From d83b481f364cb182a2cd92da9ee845c8139de441 Mon Sep 17 00:00:00 2001
From: Brett Chien <brettchien@users.noreply.github.com>
Date: Sun, 3 May 2026 06:29:02 +0000
Subject: [PATCH 01/32] feat(dispatch): turn-boundary batching dispatcher v2
 per ADR v0.3

---
 src/adapter.rs  |  68 +++--
 src/config.rs   |  54 +++-
 src/cron.rs     |   1 +
 src/discord.rs  |  78 +++++-
 src/dispatch.rs | 724 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/gateway.rs  |  95 ++++++-
 src/main.rs     |  51 +++-
 src/slack.rs    | 167 ++++++-----
 8 files changed, 1116 insertions(+), 122 deletions(-)
 create mode 100644 src/dispatch.rs
diff --git a/src/adapter.rs b/src/adapter.rs
index f558fd07..51cf01cd 100644
--- a/src/adapter.rs
+++ b/src/adapter.rs
@@ -87,6 +87,10 @@ pub struct SenderContext {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub thread_id: Option<String>,
     pub is_bot: bool,
+    /// Platform message creation time (ISO 8601 UTC).
+    /// Discord/Slack: platform timestamp. Gateway: broker receive time (best-effort).
+    /// Additive field — schema stays openab.sender.v1.
+    pub timestamp: String,
 }
 
 // --- ChatAdapter trait ---
@@ -160,6 +164,32 @@ impl AdapterRouter {
         &self.pool
     }
 
+    /// Access the reactions config (used by dispatch.rs).
+    pub fn reactions_config(&self) -> &ReactionsConfig {
+        &self.reactions_config
+    }
+
+    /// Pack one arrival event into ContentBlocks using the uniform per-arrival template:
+    ///   Text { "<sender_context>\n{json}\n</sender_context>\n\n{prompt}" }
+    ///   [extra_blocks in arrival order]
+    ///
+    /// This is the single packing code path for both per-message and batched dispatch
+    /// (ADR §3.5). For a batch of N messages, call this N times and concatenate.
+    pub fn pack_arrival_event(
+        sender_json: &str,
+        prompt: &str,
+        extra_blocks: Vec<ContentBlock>,
+    ) -> Vec<ContentBlock> {
+        let header = format!(
+            "<sender_context>\n{}\n</sender_context>\n\n{}",
+            sender_json, prompt
+        );
+        let mut blocks = Vec::with_capacity(1 + extra_blocks.len());
+        blocks.push(ContentBlock::Text { text: header });
+        blocks.extend(extra_blocks);
+        blocks
+    }
+
     /// Handle an incoming user message. The adapter is responsible for
     /// filtering, resolving the thread, and building the SenderContext.
     /// This method handles sender context injection, session management, and streaming.
@@ -176,28 +206,7 @@ impl AdapterRouter {
     ) -> Result<()> {
         tracing::debug!(platform = adapter.platform(), "processing message");
 
-        // Build content blocks: sender context + prompt text, then extra (images, transcripts)
-        let prompt_with_sender = format!(
-            "<sender_context>\n{}\n</sender_context>\n\n{}",
-            sender_json, prompt
-        );
-
-        let mut content_blocks = Vec::with_capacity(1 + extra_blocks.len());
-        // Prepend any transcript blocks (they go before the text block)
-        for block in &extra_blocks {
-            if matches!(block, ContentBlock::Text { .. }) {
-                content_blocks.push(block.clone());
-            }
-        }
-        content_blocks.push(ContentBlock::Text {
-            text: prompt_with_sender,
-        });
-        // Append non-text blocks (images)
-        for block in extra_blocks {
-            if !matches!(block, ContentBlock::Text { .. }) {
-                content_blocks.push(block);
-            }
-        }
+        let content_blocks = Self::pack_arrival_event(sender_json, prompt, extra_blocks);
 
         let thread_key = format!(
             "{}:{}",
@@ -272,6 +281,21 @@ impl AdapterRouter {
         thread_channel: &ChannelRef,
         reactions: Arc<StatusReactionController>,
         other_bot_present: bool,
+    ) -> Result<()> {
+        self.stream_prompt_blocks(adapter, thread_key, content_blocks, thread_channel, reactions, other_bot_present).await
+    }
+
+    /// Drive one ACP turn with the given pre-packed ContentBlocks.
+    /// Called by both `handle_message` (per-message mode) and `dispatch::dispatch_batch`
+    /// (batched mode).
+    pub async fn stream_prompt_blocks(
+        &self,
+        adapter: &Arc<dyn ChatAdapter>,
+        thread_key: &str,
+        content_blocks: Vec<ContentBlock>,
+        thread_channel: &ChannelRef,
+        reactions: Arc<StatusReactionController>,
+        other_bot_present: bool,
     ) -> Result<()> {
         let adapter = adapter.clone();
         let thread_channel = thread_channel.clone();
diff --git a/src/config.rs b/src/config.rs
index 3bd99504..4978b1ad 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -4,6 +4,29 @@ use serde::Deserialize;
 use std::collections::HashMap;
 use std::path::Path;
 
+/// Controls how incoming messages are dispatched to ACP turns.
+///
+/// - `PerMessage` (default): each message becomes its own ACP turn (v0.8.2-beta.1 behaviour).
+/// - `Batched`: messages that arrive while a turn is in flight are buffered and merged
+///   into one ACP turn at the next turn boundary (see ADR: turn-boundary-batching-adr.md).
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub enum MessageProcessingMode {
+    #[default]
+    PerMessage,
+    Batched,
+}
+
+impl<'de> Deserialize<'de> for MessageProcessingMode {
+    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+        let s = String::deserialize(deserializer)?;
+        match s.to_lowercase().replace('-', "_").as_str() {
+            "per_message" | "per-message" => Ok(Self::PerMessage),
+            "batched" => Ok(Self::Batched),
+            other => Err(serde::de::Error::unknown_variant(other, &["per-message", "batched"])),
+        }
+    }
+}
+
 /// Controls whether the bot processes messages from other Discord bots.
 ///
 /// Inspired by Hermes Agent's `DISCORD_ALLOW_BOTS` 3-value design:
@@ -120,9 +143,20 @@ pub struct DiscordConfig {
     /// Default: false (opt-in). `allowed_users` still applies in DMs.
     #[serde(default)]
     pub allow_dm: bool,
+    /// Message dispatch mode. Default: per-message (v0.8.2-beta.1 behaviour).
+    #[serde(default)]
+    pub message_processing_mode: MessageProcessingMode,
+    /// Batched mode only: per-thread channel capacity. Default: 10.
+    #[serde(default = "default_max_buffered_messages")]
+    pub max_buffered_messages: usize,
+    /// Batched mode only: soft token cap for greedy drain. Default: 24000.
+    #[serde(default = "default_max_batch_tokens")]
+    pub max_batch_tokens: usize,
 }
 
 fn default_max_bot_turns() -> u32 { 20 }
+fn default_max_buffered_messages() -> usize { 10 }
+fn default_max_batch_tokens() -> usize { 24_000 }
 
 /// Controls whether the bot responds to user messages in threads without @mention.
 ///
@@ -179,6 +213,15 @@ pub struct SlackConfig {
     /// Human message resets the counter. Default: 20.
     #[serde(default = "default_max_bot_turns")]
     pub max_bot_turns: u32,
+    /// Message dispatch mode. Default: per-message.
+    #[serde(default)]
+    pub message_processing_mode: MessageProcessingMode,
+    /// Batched mode only: per-thread channel capacity. Default: 10.
+    #[serde(default = "default_max_buffered_messages")]
+    pub max_buffered_messages: usize,
+    /// Batched mode only: soft token cap for greedy drain. Default: 24000.
+    #[serde(default = "default_max_batch_tokens")]
+    pub max_batch_tokens: usize,
 }
 
 #[derive(Debug, Deserialize)]
@@ -205,6 +248,15 @@ pub struct GatewayConfig {
     /// Enable streaming (typewriter) mode — requires gateway platform to support message editing.
     #[serde(default)]
     pub streaming: bool,
+    /// Message dispatch mode. Default: per-message.
+    #[serde(default)]
+    pub message_processing_mode: MessageProcessingMode,
+    /// Batched mode only: per-thread channel capacity. Default: 10.
+    #[serde(default = "default_max_buffered_messages")]
+    pub max_buffered_messages: usize,
+    /// Batched mode only: soft token cap for greedy drain. Default: 24000.
+    #[serde(default = "default_max_batch_tokens")]
+    pub max_batch_tokens: usize,
 }
 
 fn default_gateway_platform() -> String {
@@ -285,7 +337,7 @@ impl<'de> Deserialize<'de> for ToolDisplay {
     }
 }
 
-#[derive(Debug, Deserialize)]
+#[derive(Debug, Clone, Deserialize)]
 pub struct ReactionsConfig {
     #[serde(default = "default_true")]
     pub enabled: bool,
diff --git a/src/cron.rs b/src/cron.rs
index ce5245ec..ffeda136 100644
--- a/src/cron.rs
+++ b/src/cron.rs
@@ -350,6 +350,7 @@ async fn fire_cronjob(
         channel_id: reply_channel.parent_id.as_deref().unwrap_or(&reply_channel.channel_id).to_string(),
         thread_id: reply_channel.thread_id.clone().or(Some(reply_channel.channel_id.clone())),
         is_bot: true,
+        timestamp: Utc::now().to_rfc3339(),
     };
     let sender_json = match serde_json::to_string(&sender) {
         Ok(j) => j,
diff --git a/src/discord.rs b/src/discord.rs
index 67f3c7c8..b464b5ad 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -160,6 +160,9 @@ pub struct Handler {
     pub bot_turns: tokio::sync::Mutex<BotTurnTracker>,
     /// Allow the bot to respond to Discord DMs.
     pub allow_dm: bool,
+    /// Batched-mode dispatcher (None in per-message mode).
+    pub dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
+    pub message_processing_mode: crate::config::MessageProcessingMode,
 }
 
 impl Handler {
@@ -527,6 +530,7 @@ impl EventHandler for Handler {
             &msg.channel_id.to_string(),
             thread_parent_id.as_deref(),
             msg.author.bot,
+            &msg.timestamp.to_rfc3339().unwrap_or_default(),
         );
 
         // Build extra content blocks from attachments (audio → STT, text → inline, image → encode)
@@ -622,7 +626,7 @@ impl EventHandler for Handler {
         let trigger_msg = discord_msg_ref(&msg);
 
         // Per-thread streaming: check if another bot is present in this thread
-        let other_bot_present = {
+        let other_bot_present_flag = {
             let cache = self.multibot_threads.lock().await;
             cache.contains_key(&msg.channel_id.to_string())
         };
@@ -636,13 +640,52 @@ impl EventHandler for Handler {
         }
 
         let router = self.router.clone();
+        let mode = self.message_processing_mode;
+        let dispatcher = self.dispatcher.clone();
+
         tokio::spawn(async move {
             let sender_json = serde_json::to_string(&sender).unwrap();
-            if let Err(e) = router
-                .handle_message(&adapter, &thread_channel, &sender_json, &prompt, extra_blocks, &trigger_msg, other_bot_present)
-                .await
-            {
-                error!("handle_message error: {e}");
+            match mode {
+                crate::config::MessageProcessingMode::PerMessage => {
+                    if let Err(e) = router
+                        .handle_message(
+                            &adapter,
+                            &thread_channel,
+                            &sender_json,
+                            &prompt,
+                            extra_blocks,
+                            &trigger_msg,
+                            other_bot_present_flag,
+                        )
+                        .await
+                    {
+                        error!("handle_message error: {e}");
+                    }
+                }
+                crate::config::MessageProcessingMode::Batched => {
+                    if let Some(dispatcher) = dispatcher {
+                        let thread_key = format!("discord:{}", thread_channel.channel_id);
+                        let estimated_tokens =
+                            crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
+                        let buf_msg = crate::dispatch::BufferedMessage {
+                            sender_json,
+                            prompt,
+                            extra_blocks,
+                            trigger_msg,
+                            arrived_at: std::time::Instant::now(),
+                            estimated_tokens,
+                            other_bot_present: other_bot_present_flag,
+                        };
+                        if let Err(e) = dispatcher
+                            .submit(thread_key, thread_channel, adapter, buf_msg)
+                            .await
+                        {
+                            error!("dispatcher submit error: {e}");
+                        }
+                    } else {
+                        error!("batched mode enabled but no dispatcher configured");
+                    }
+                }
             }
         });
     }
@@ -862,10 +905,25 @@ impl Handler {
         cmd: &serenity::model::application::CommandInteraction,
     ) {
         let thread_key = format!("discord:{}", cmd.channel_id.get());
+
+        // Drop any messages buffered for this thread before resetting the session.
+        // /reset semantics include /cancel-all: discard buffered work, then reset.
+        let dropped = self
+            .dispatcher
+            .as_ref()
+            .map(|d| d.cancel_buffered(&thread_key))
+            .unwrap_or(0);
+
         let result = self.router.pool().reset_session(&thread_key).await;
 
         let msg = match result {
+            Ok(()) if dropped > 0 => {
+                format!("🔄 Session reset. Dropped {dropped} buffered message(s). Start a new conversation!")
+            }
             Ok(()) => "🔄 Session reset. Start a new conversation!".to_string(),
+            Err(_) if dropped > 0 => {
+                format!("🔄 Dropped {dropped} buffered message(s). No active session to reset.")
+            }
             Err(_) => "⚠️ No active session to reset. Start a conversation first by @mentioning the bot.".to_string(),
         };
 
@@ -1104,6 +1162,7 @@ fn build_sender_context(
     msg_channel_id: &str,
     thread_parent_id: Option<&str>,
     is_bot: bool,
+    timestamp: &str,
 ) -> SenderContext {
     SenderContext {
         schema: "openab.sender.v1".into(),
@@ -1114,6 +1173,7 @@ fn build_sender_context(
         channel_id: thread_parent_id.unwrap_or(msg_channel_id).to_string(),
         thread_id: thread_parent_id.map(|_| msg_channel_id.to_string()),
         is_bot,
+        timestamp: timestamp.to_string(),
     }
 }
 
@@ -1438,7 +1498,7 @@ mod tests {
     /// In-thread message: channel_id = parent, thread_id = thread channel ID.
     #[test]
     fn build_sender_context_in_thread() {
-        let ctx = build_sender_context("user1", "alice", "Alice", "thread_ch", Some("parent_ch"), false);
+        let ctx = build_sender_context("user1", "alice", "Alice", "thread_ch", Some("parent_ch"), false, "2026-05-01T00:00:00Z");
         assert_eq!(ctx.channel_id, "parent_ch");
         assert_eq!(ctx.thread_id, Some("thread_ch".to_string()));
         assert_eq!(ctx.channel, "discord");
@@ -1449,7 +1509,7 @@ mod tests {
     /// Non-thread message: channel_id = message channel, thread_id = None.
     #[test]
     fn build_sender_context_not_in_thread() {
-        let ctx = build_sender_context("user1", "alice", "Alice", "main_ch", None, false);
+        let ctx = build_sender_context("user1", "alice", "Alice", "main_ch", None, false, "2026-05-01T00:00:00Z");
         assert_eq!(ctx.channel_id, "main_ch");
         assert_eq!(ctx.thread_id, None);
     }
@@ -1457,7 +1517,7 @@ mod tests {
     /// Bot sender: is_bot flag propagated correctly.
     #[test]
     fn build_sender_context_bot_sender() {
-        let ctx = build_sender_context("bot1", "mybot", "MyBot", "ch", Some("parent"), true);
+        let ctx = build_sender_context("bot1", "mybot", "MyBot", "ch", Some("parent"), true, "2026-05-01T00:00:00Z");
         assert!(ctx.is_bot);
         assert_eq!(ctx.channel_id, "parent");
         assert_eq!(ctx.thread_id, Some("ch".to_string()));
diff --git a/src/dispatch.rs b/src/dispatch.rs
new file mode 100644
index 00000000..177d130e
--- /dev/null
+++ b/src/dispatch.rs
@@ -0,0 +1,724 @@
+//! Turn-boundary message batching dispatcher.
+//!
+//! See ADR: turn-boundary-batching-adr.md for full design rationale.
+//!
+//! # Invariants
+//! - I1: First message after idle has zero added latency.
+//! - I2: At most one in-flight ACP turn per thread.
+//! - I3: Broker structural fidelity — no merging, splitting, reordering, or
+//!   semantic transformation of arrival events.
+
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::Instant;
+
+use anyhow::Result;
+use tracing::{error, info, info_span, warn};
+
+use crate::adapter::{AdapterRouter, ChannelRef, ChatAdapter, MessageRef};
+use crate::acp::ContentBlock;
+use crate::error_display::format_user_error;
+use crate::reactions::StatusReactionController;
+
+// ---------------------------------------------------------------------------
+// Public types
+// ---------------------------------------------------------------------------
+
+/// One arrival event buffered for a future ACP turn.
+pub struct BufferedMessage {
+    /// Serialised SenderContext JSON (already built by the platform adapter).
+    pub sender_json: String,
+    /// User-visible prompt text (verbatim, never transformed).
+    pub prompt: String,
+    /// Attachment blocks (images, STT transcripts) in arrival order.
+    pub extra_blocks: Vec<ContentBlock>,
+    /// Anchor for reactions (👀 / ❌).
+    pub trigger_msg: MessageRef,
+    /// Broker receive time — used for `buffer_wait_ms` observability.
+    pub arrived_at: Instant,
+    /// Rough token estimate for `max_batch_tokens` cap.
+    pub estimated_tokens: usize,
+    /// Snapshot of "is another bot present in this thread" at submit time —
+    /// matches v0.8.2-beta.1's per-message by-value pattern. `dispatch_batch`
+    /// uses the freshest snapshot in the batch (`batch.last()`).
+    pub other_bot_present: bool,
+}
+
+/// Error returned by `Dispatcher::submit`.
+#[derive(Debug)]
+pub enum DispatchError {
+    /// The per-thread consumer task has exited unexpectedly.
+    ConsumerDead,
+}
+
+impl std::fmt::Display for DispatchError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::ConsumerDead => write!(f, "dispatch consumer exited unexpectedly"),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Internal types
+// ---------------------------------------------------------------------------
+
+struct ThreadHandle {
+    tx: tokio::sync::mpsc::Sender<BufferedMessage>,
+    _consumer: tokio::task::JoinHandle<()>,
+    /// Race-safe eviction counter (§2.5). Plain u64 — all reads/writes under per_thread lock.
+    generation: u64,
+    channel_id: String,
+    adapter_kind: String,
+}
+
+impl ThreadHandle {
+    /// Close the sender and drain remaining messages for shutdown logging.
+    fn drain_pending(&mut self) -> usize {
+        // Closing the sender causes the consumer to exit on next recv().
+        // We can't synchronously drain an async channel, so we report the
+        // approximate capacity used via the channel's current length.
+        self.tx.max_capacity() - self.tx.capacity()
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Dispatcher
+// ---------------------------------------------------------------------------
+
+/// Per-thread message dispatcher for batched mode.
+///
+/// Constructed once in `main.rs` and shared via `Arc`. Platform adapters call
+/// `submit()` from their per-message `tokio::spawn`'d tasks.
+pub struct Dispatcher {
+    /// std::sync::Mutex — critical section has no .await; tokio::Mutex buys nothing here.
+    per_thread: Mutex<HashMap<String, ThreadHandle>>,
+    /// Monotonic counter for `ThreadHandle.generation` (§2.5). Pre-fetched on
+    /// every `submit` and consumed only when a fresh handle is inserted; wasted
+    /// values are fine because generations need only be monotonic, not contiguous.
+    next_generation: AtomicU64,
+    router: Arc<AdapterRouter>,
+    max_buffered_messages: usize,
+    max_batch_tokens: usize,
+}
+
+impl Dispatcher {
+    pub fn new(
+        router: Arc<AdapterRouter>,
+        max_buffered_messages: usize,
+        max_batch_tokens: usize,
+    ) -> Self {
+        Self {
+            per_thread: Mutex::new(HashMap::new()),
+            next_generation: AtomicU64::new(0),
+            router,
+            max_buffered_messages,
+            max_batch_tokens,
+        }
+    }
+
+    /// Submit one arrival event for the given thread.
+    ///
+    /// - If the thread has no active consumer, one is spawned lazily.
+    /// - If the channel is full, this future parks until space is available
+    ///   (backpressure — no data loss, no error).
+    /// - If the consumer has died (`SendError`), surfaces ❌ + ⚠️ and returns
+    ///   `Err(DispatchError::ConsumerDead)` (§2.5).
+    ///
+    /// `adapter` is passed per-call (not stored on `Dispatcher`) because the
+    /// Discord adapter is constructed inside serenity's `ready` callback via
+    /// `OnceLock` — after the Dispatcher is built in `main.rs`.
+    pub async fn submit(
+        &self,
+        thread_key: String,
+        thread_channel: ChannelRef,
+        adapter: Arc<dyn ChatAdapter>,
+        msg: BufferedMessage,
+    ) -> Result<(), DispatchError> {
+        let cap = self.max_buffered_messages;
+        let router = Arc::clone(&self.router);
+        let max_tokens = self.max_batch_tokens;
+
+        // Pre-fetch a generation in case we end up inserting a fresh handle.
+        // Wasted if the entry already exists; generations need only be monotonic.
+        let next_g = self.next_generation.fetch_add(1, Ordering::Relaxed);
+
+        let (tx, my_generation) = {
+            let mut map = self.per_thread.lock().unwrap();
+            let entry = map.entry(thread_key.clone()).or_insert_with(|| {
+                let (tx, rx) = tokio::sync::mpsc::channel(cap);
+                let consumer = tokio::spawn(consumer_loop(
+                    thread_key.clone(),
+                    thread_channel.clone(),
+                    rx,
+                    Arc::clone(&router),
+                    Arc::clone(&adapter),
+                    cap,
+                    max_tokens,
+                ));
+                ThreadHandle {
+                    tx,
+                    _consumer: consumer,
+                    generation: next_g,
+                    channel_id: thread_channel.channel_id.clone(),
+                    adapter_kind: adapter.platform().to_string(),
+                }
+            });
+            (entry.tx.clone(), entry.generation)
+        };
+        // Dispatcher mutex released — held only to look up/insert the handle.
+
+        if let Err(e) = tx.send(msg).await {
+            // Consumer has exited — race-safe eviction under lock (§2.5).
+            {
+                let mut map = self.per_thread.lock().unwrap();
+                Self::try_evict_locked(&mut map, &thread_key, my_generation);
+            }
+            let failed_msg = e.0;
+            let _ = adapter
+                .add_reaction(
+                    &failed_msg.trigger_msg,
+                    &crate::config::ReactionsConfig::default().emojis.error,
+                )
+                .await;
+            let _ = adapter
+                .send_message(
+                    &thread_channel,
+                    &format!("⚠️ {}", format_user_error("dispatch consumer exited unexpectedly")),
+                )
+                .await;
+            return Err(DispatchError::ConsumerDead);
+        }
+        Ok(())
+    }
+
+    /// Drop the per-thread handle and abort its consumer, discarding any messages
+    /// buffered at cancel time (§2.5 / §4.4). Returns the approximate number of
+    /// messages that were buffered (mpsc length at removal time).
+    ///
+    /// Disjoint from SendError recovery: removal happens *before* abort, so any
+    /// fresh `submit` after this returns lands on a lazily-constructed new handle
+    /// instead of observing `SendError`.
+    pub fn cancel_buffered(&self, thread_key: &str) -> usize {
+        let mut map = self.per_thread.lock().unwrap();
+        if let Some(handle) = map.remove(thread_key) {
+            let pending = handle.tx.max_capacity() - handle.tx.capacity();
+            handle._consumer.abort();
+            pending
+        } else {
+            0
+        }
+    }
+
+    /// §2.5 race-safe eviction. Caller must hold the `per_thread` mutex.
+    /// Removes the entry only if its generation matches `my_generation` —
+    /// protects against evicting a fresh handle that another `submit` lazily
+    /// inserted between this caller's failed `tx.send` and this call.
+    /// Returns true if the entry was removed.
+    fn try_evict_locked(
+        map: &mut HashMap<String, ThreadHandle>,
+        thread_key: &str,
+        my_generation: u64,
+    ) -> bool {
+        if let Some(handle) = map.get(thread_key) {
+            if handle.generation == my_generation {
+                map.remove(thread_key);
+                return true;
+            }
+        }
+        false
+    }
+
+    /// Log buffered-message counts and drop all handles (called on SIGTERM).
+    pub fn shutdown(&self) {
+        let mut map = self.per_thread.lock().unwrap();
+        for (thread_id, handle) in map.iter_mut() {
+            let pending = handle.drain_pending();
+            if pending > 0 {
+                warn!(
+                    thread_id = %thread_id,
+                    channel   = %handle.channel_id,
+                    adapter   = %handle.adapter_kind,
+                    buffered_lost = pending,
+                    "shutdown drained pending messages without dispatch",
+                );
+            }
+        }
+        map.clear();
+    }
+}
+
+// ---------------------------------------------------------------------------
+// consumer_loop
+// ---------------------------------------------------------------------------
+
+#[allow(clippy::too_many_arguments)]
+async fn consumer_loop(
+    thread_key: String,
+    thread_channel: ChannelRef,
+    mut rx: tokio::sync::mpsc::Receiver<BufferedMessage>,
+    router: Arc<AdapterRouter>,
+    adapter: Arc<dyn ChatAdapter>,
+    max_batch: usize,
+    max_tokens: usize,
+) {
+    // `pending` holds a message that exceeded the token cap for the current batch;
+    // it becomes the first message of the next batch, preserving FIFO.
+    let mut pending: Option<BufferedMessage> = None;
+
+    loop {
+        // I1: block until at least one message arrives (zero latency for first message).
+        let first = match pending.take() {
+            Some(msg) => msg,
+            None => match rx.recv().await {
+                Some(msg) => msg,
+                None => break, // all senders dropped → cleanup_idle evicted us
+            },
+        };
+
+        // Greedy drain up to max_batch messages or max_tokens.
+        let mut batch = vec![first];
+        let mut cumulative_tokens = batch[0].estimated_tokens;
+
+        while batch.len() < max_batch {
+            match rx.try_recv() {
+                Ok(more) => {
+                    if cumulative_tokens + more.estimated_tokens > max_tokens {
+                        // Token cap — save for next turn (FIFO preserved).
+                        pending = Some(more);
+                        break;
+                    }
+                    cumulative_tokens += more.estimated_tokens;
+                    batch.push(more);
+                }
+                Err(_) => break,
+            }
+        }
+
+        // §2.6 freshness: use the freshest snapshot in the batch — the last
+        // message's `other_bot_present` (captured at submit time, mirrors
+        // v0.8.2-beta.1's per-message by-value pattern). batch is non-empty.
+        let bot_present = batch.last().unwrap().other_bot_present;
+
+        dispatch_batch(
+            &thread_key,
+            &thread_channel,
+            &router,
+            &adapter,
+            batch,
+            bot_present,
+        )
+        .await;
+    }
+    // rx.recv() returned None → all senders dropped → exit cleanly.
+}
+
+// ---------------------------------------------------------------------------
+// dispatch_batch
+// ---------------------------------------------------------------------------
+
+async fn dispatch_batch(
+    thread_key: &str,
+    thread_channel: &ChannelRef,
+    router: &Arc<AdapterRouter>,
+    adapter: &Arc<dyn ChatAdapter>,
+    batch: Vec<BufferedMessage>,
+    other_bot_present: bool,
+) {
+    let dispatch_start = Instant::now();
+    let batch_size = batch.len();
+
+    // Apply 👀 reaction to every message in the batch before dispatch (§6.7).
+    let queued_emoji = crate::config::ReactionsConfig::default().emojis.queued;
+    for msg in &batch {
+        let _ = adapter.add_reaction(&msg.trigger_msg, &queued_emoji).await;
+    }
+
+    // Collect per-event observability data.
+    let tokens_per_event: Vec<usize> = batch.iter().map(|m| m.estimated_tokens).collect();
+    let wait_ms: Vec<u128> = batch
+        .iter()
+        .map(|m| m.arrived_at.elapsed().as_millis())
+        .collect();
+
+    // Pack all arrival events into one Vec<ContentBlock> (§3.3).
+    let mut content_blocks: Vec<ContentBlock> = Vec::new();
+    for msg in &batch {
+        let mut event_blocks =
+            AdapterRouter::pack_arrival_event(&msg.sender_json, &msg.prompt, msg.extra_blocks.clone());
+        content_blocks.append(&mut event_blocks);
+    }
+    let packed_block_count = content_blocks.len();
+
+    // Ensure session exists.
+    if let Err(e) = router.pool().get_or_create(thread_key).await {
+        let user_msg = format_user_error(&e.to_string());
+        let _ = adapter
+            .send_message(thread_channel, &format!("⚠️ {user_msg}"))
+            .await;
+        error!("pool error in dispatch_batch: {e}");
+        return;
+    }
+
+    // Anchor reactions on the last message in the batch.
+    let trigger_msg = batch.last().unwrap().trigger_msg.clone();
+    let reactions_config = router.reactions_config().clone();
+    let reactions = Arc::new(StatusReactionController::new(
+        reactions_config.enabled,
+        adapter.clone(),
+        trigger_msg,
+        reactions_config.emojis.clone(),
+        reactions_config.timing.clone(),
+    ));
+    // 👀 already applied above; skip set_queued() to avoid double-reaction.
+
+    let result = router
+        .stream_prompt_blocks(
+            adapter,
+            thread_key,
+            content_blocks,
+            thread_channel,
+            reactions.clone(),
+            other_bot_present,
+        )
+        .await;
+
+    match &result {
+        Ok(()) => reactions.set_done().await,
+        Err(_) => reactions.set_error().await,
+    }
+
+    let hold_ms = if result.is_ok() {
+        reactions_config.timing.done_hold_ms
+    } else {
+        reactions_config.timing.error_hold_ms
+    };
+    if reactions_config.remove_after_reply {
+        let reactions = reactions;
+        tokio::spawn(async move {
+            tokio::time::sleep(std::time::Duration::from_millis(hold_ms)).await;
+            reactions.clear().await;
+        });
+    }
+
+    if let Err(ref e) = result {
+        let _ = adapter
+            .send_message(thread_channel, &format!("⚠️ {e}"))
+            .await;
+    }
+
+    let agent_dispatch_ms = dispatch_start.elapsed().as_millis();
+    let span = info_span!(
+        "dispatch",
+        channel = %thread_channel.channel_id,
+        adapter = adapter.platform(),
+    );
+    let _enter = span.enter();
+    info!(
+        thread_key         = %thread_key,
+        events_per_dispatch = batch_size,
+        packed_block_count  = packed_block_count,
+        agent_dispatch_ms   = agent_dispatch_ms,
+        tokens_per_event    = ?tokens_per_event,
+        wait_ms             = ?wait_ms,
+        "batch dispatched",
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Token estimation
+// ---------------------------------------------------------------------------
+
+/// Rough token estimate for a buffered message (used for `max_batch_tokens` cap).
+/// Intentionally coarse — the goal is a guard rail, not an exact pre-flight.
+pub fn estimate_tokens(prompt: &str, extra_blocks: &[ContentBlock]) -> usize {
+    // ~4 chars per token for text; fixed 512 per image block (conservative).
+    let text_tokens = prompt.len() / 4 + 1;
+    let block_tokens: usize = extra_blocks
+        .iter()
+        .map(|b| match b {
+            ContentBlock::Text { text } => text.len() / 4 + 1,
+            ContentBlock::Image { .. } => 512,
+        })
+        .sum();
+    text_tokens + block_tokens
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn estimate_tokens_empty() {
+        assert!(estimate_tokens("", &[]) >= 1);
+    }
+
+    #[test]
+    fn estimate_tokens_text() {
+        // 400 chars ≈ 100 tokens
+        let s = "a".repeat(400);
+        assert_eq!(estimate_tokens(&s, &[]), 101);
+    }
+
+    #[test]
+    fn estimate_tokens_image_block() {
+        let blocks = vec![ContentBlock::Image {
+            media_type: "image/png".into(),
+            data: "base64data".into(),
+        }];
+        assert_eq!(estimate_tokens("", &blocks), 1 + 512);
+    }
+
+    #[test]
+    fn pack_arrival_event_single() {
+        let blocks = AdapterRouter::pack_arrival_event(
+            r#"{"schema":"openab.sender.v1"}"#,
+            "hello",
+            vec![],
+        );
+        assert_eq!(blocks.len(), 1);
+        if let ContentBlock::Text { text } = &blocks[0] {
+            assert!(text.contains("<sender_context>"));
+            assert!(text.contains("hello"));
+        } else {
+            panic!("expected Text block");
+        }
+    }
+
+    #[test]
+    fn pack_arrival_event_with_extra_blocks() {
+        let extra = vec![
+            ContentBlock::Text { text: "[Voice transcript]: hi".into() },
+            ContentBlock::Image { media_type: "image/png".into(), data: "abc".into() },
+        ];
+        let blocks = AdapterRouter::pack_arrival_event("{}", "prompt", extra);
+        // header + 2 extra = 3 blocks
+        assert_eq!(blocks.len(), 3);
+        // extra blocks follow the header in arrival order
+        assert!(matches!(&blocks[1], ContentBlock::Text { text } if text.contains("Voice transcript")));
+        assert!(matches!(&blocks[2], ContentBlock::Image { .. }));
+    }
+
+    #[test]
+    fn pack_arrival_event_batch_n2() {
+        // Two arrival events concatenated → 2 header blocks
+        let mut all: Vec<ContentBlock> = Vec::new();
+        all.extend(AdapterRouter::pack_arrival_event(r#"{"ts":"T1"}"#, "msg1", vec![]));
+        all.extend(AdapterRouter::pack_arrival_event(r#"{"ts":"T2"}"#, "msg2", vec![]));
+        assert_eq!(all.len(), 2);
+        if let ContentBlock::Text { text } = &all[0] {
+            assert!(text.contains("msg1"));
+        }
+        if let ContentBlock::Text { text } = &all[1] {
+            assert!(text.contains("msg2"));
+        }
+    }
+
+    // ADR §3.6 Scenario B — text in one message, image in the next, same author.
+    // Broker preserves structural truth: image stays in M2 alone, both messages
+    // carry the same sender_id so the agent can semantically link them.
+    #[test]
+    fn pack_arrival_event_scenario_b_image_in_separate_message() {
+        let mut all: Vec<ContentBlock> = Vec::new();
+        // M1 (alice): "see this image"
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T1"}"#,
+            "see this image",
+            vec![],
+        ));
+        // M2 (alice): image, no text
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T2"}"#,
+            "",
+            vec![ContentBlock::Image {
+                media_type: "image/png".into(),
+                data: "imgB".into(),
+            }],
+        ));
+        // header(M1) + header(M2) + image(M2) = 3 blocks
+        assert_eq!(all.len(), 3);
+        // M1's header carries text only
+        if let ContentBlock::Text { text } = &all[0] {
+            assert!(text.contains(r#""sender_id":"A""#));
+            assert!(text.contains(r#""ts":"T1""#));
+            assert!(text.contains("see this image"));
+        } else {
+            panic!("expected Text header for M1");
+        }
+        // M2's header carries empty prompt (line after </sender_context> is blank)
+        if let ContentBlock::Text { text } = &all[1] {
+            assert!(text.contains(r#""ts":"T2""#));
+            assert!(text.ends_with("\n\n"), "M2 prompt must be empty: {text:?}");
+        } else {
+            panic!("expected Text header for M2");
+        }
+        // M2's image follows immediately after its header (structural attribution)
+        assert!(matches!(&all[2], ContentBlock::Image { .. }));
+    }
+
+    // ADR §3.6 Scenario C — fragmented multi-author batch.
+    // Repeated sender_id is preserved across non-adjacent messages; bob's interjection
+    // is kept as-is (no silent drop, no reorder).
+    #[test]
+    fn pack_arrival_event_scenario_c_multi_author_interleaved() {
+        let mut all: Vec<ContentBlock> = Vec::new();
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T1"}"#,
+            "see this image",
+            vec![],
+        ));
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"B","ts":"T2"}"#,
+            "what?",
+            vec![],
+        ));
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T3"}"#,
+            "",
+            vec![ContentBlock::Image {
+                media_type: "image/png".into(),
+                data: "imgC".into(),
+            }],
+        ));
+        // 3 headers + 1 image = 4 blocks
+        assert_eq!(all.len(), 4);
+        // Order is preserved (no reorder).
+        let h1 = match &all[0] {
+            ContentBlock::Text { text } => text,
+            _ => panic!("expected Text"),
+        };
+        let h2 = match &all[1] {
+            ContentBlock::Text { text } => text,
+            _ => panic!("expected Text"),
+        };
+        let h3 = match &all[2] {
+            ContentBlock::Text { text } => text,
+            _ => panic!("expected Text"),
+        };
+        assert!(h1.contains(r#""sender_id":"A""#) && h1.contains("see this image"));
+        assert!(h2.contains(r#""sender_id":"B""#) && h2.contains("what?"));
+        assert!(h3.contains(r#""sender_id":"A""#));
+        // M3's image attached to M3 only.
+        assert!(matches!(&all[3], ContentBlock::Image { .. }));
+    }
+
+    // ADR §3.6 Scenario D — voice-only message in a batch.
+    // M2 has empty prompt + transcript text block. Per ADR, transcript moves AFTER
+    // <sender_context> (vs. v0.8.2-beta.1's prepended position).
+    #[test]
+    fn pack_arrival_event_scenario_d_voice_only() {
+        let mut all: Vec<ContentBlock> = Vec::new();
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T1"}"#,
+            "look at this",
+            vec![ContentBlock::Image {
+                media_type: "image/png".into(),
+                data: "scr".into(),
+            }],
+        ));
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"A","ts":"T2"}"#,
+            "",
+            vec![ContentBlock::Text {
+                text: "[Voice message transcript]: hey can we sync about the deploy".into(),
+            }],
+        ));
+        all.extend(AdapterRouter::pack_arrival_event(
+            r#"{"sender_id":"B","ts":"T3"}"#,
+            "what?",
+            vec![],
+        ));
+        // header(M1) + image(M1) + header(M2) + transcript(M2) + header(M3) = 5
+        assert_eq!(all.len(), 5);
+        if let ContentBlock::Text { text } = &all[0] {
+            assert!(text.contains(r#""ts":"T1""#));
+            assert!(text.contains("look at this"));
+        }
+        assert!(matches!(&all[1], ContentBlock::Image { .. }));
+        // M2 header has empty prompt; transcript follows AFTER the header (not before).
+        if let ContentBlock::Text { text } = &all[2] {
+            assert!(text.contains(r#""ts":"T2""#));
+            assert!(text.ends_with("\n\n"));
+        }
+        if let ContentBlock::Text { text } = &all[3] {
+            assert!(text.contains("Voice message transcript"));
+            assert!(text.contains("sync about the deploy"));
+        } else {
+            panic!("expected transcript Text block as M2 attachment");
+        }
+        if let ContentBlock::Text { text } = &all[4] {
+            assert!(text.contains(r#""sender_id":"B""#));
+            assert!(text.contains("what?"));
+        }
+    }
+
+    // Token-cap math: a single message that already exceeds max_batch_tokens still
+    // dispatches alone (the consumer_loop logic admits the first message before
+    // checking the cap). Verifies estimate_tokens scales with input length.
+    #[test]
+    fn estimate_tokens_oversized_single_message() {
+        // ~24k token text (96000 chars / 4 chars-per-token).
+        let big = "x".repeat(96_000);
+        let est = estimate_tokens(&big, &[]);
+        assert!(est > 24_000, "expected >24k tokens, got {est}");
+    }
+
+    // Cumulative token math: two messages whose sum exceeds max_batch_tokens.
+    // The consumer_loop reads first, then peeks at the next; if cumulative tokens
+    // > cap, the second is held over to the next batch (FIFO preserved).
+    #[test]
+    fn estimate_tokens_cumulative_exceeds_cap() {
+        let max_tokens = 24_000_usize;
+        let m1 = estimate_tokens(&"a".repeat(80_000), &[]);
+        let m2 = estimate_tokens(&"b".repeat(50_000), &[]);
+        assert!(m1 < max_tokens);
+        assert!(m1 + m2 > max_tokens, "{m1} + {m2} should exceed cap");
+    }
+
+    // ADR §2.5 race-safe eviction. The full SendError path requires a real
+    // AdapterRouter (concrete struct, not a trait — no easy mock seam), so we
+    // unit-test the eviction predicate in isolation. End-to-end consumer-death
+    // recovery is exercised by the manual staging smoke documented in the ADR.
+    fn dummy_handle(generation: u64) -> ThreadHandle {
+        let (tx, _rx) = tokio::sync::mpsc::channel::<BufferedMessage>(1);
+        let consumer = tokio::spawn(async {});
+        ThreadHandle {
+            tx,
+            _consumer: consumer,
+            generation,
+            channel_id: "C".into(),
+            adapter_kind: "discord".into(),
+        }
+    }
+
+    #[tokio::test]
+    async fn try_evict_locked_removes_when_generation_matches() {
+        let mut map: HashMap<String, ThreadHandle> = HashMap::new();
+        map.insert("t".into(), dummy_handle(7));
+        assert!(Dispatcher::try_evict_locked(&mut map, "t", 7));
+        assert!(map.is_empty());
+    }
+
+    // The bug §2.5 prevents: a stale producer (my_gen=7) observing SendError
+    // must not remove a freshly inserted handle (gen=8) created by another
+    // submit between the failed send and the eviction attempt.
+    #[tokio::test]
+    async fn try_evict_locked_keeps_when_generation_differs() {
+        let mut map: HashMap<String, ThreadHandle> = HashMap::new();
+        map.insert("t".into(), dummy_handle(8));
+        assert!(!Dispatcher::try_evict_locked(&mut map, "t", 7));
+        assert_eq!(map.len(), 1);
+        assert_eq!(map.get("t").unwrap().generation, 8);
+    }
+
+    #[tokio::test]
+    async fn try_evict_locked_returns_false_when_absent() {
+        let mut map: HashMap<String, ThreadHandle> = HashMap::new();
+        assert!(!Dispatcher::try_evict_locked(&mut map, "missing", 0));
+    }
+}
diff --git a/src/gateway.rs b/src/gateway.rs
index 816afb45..641c2dfe 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -350,6 +350,8 @@ pub async fn run_gateway_adapter(
     params: GatewayParams,
     router: Arc<AdapterRouter>,
     mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
+    dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
+    message_processing_mode: crate::config::MessageProcessingMode,
 ) -> Result<()> {
     let platform: &'static str = Box::leak(params.platform.into_boxed_str());
 
@@ -487,6 +489,12 @@ pub async fn run_gateway_adapter(
                                         channel_id: event.channel.id.clone(),
                                         thread_id: event.channel.thread_id.clone(),
                                         is_bot: event.sender.is_bot,
+                                        // Gateway: use event timestamp if available, else broker receive time
+                                        timestamp: if event.timestamp.is_empty() {
+                                            chrono_now_iso8601()
+                                        } else {
+                                            event.timestamp.clone()
+                                        },
                                     };
                                     let sender_json = serde_json::to_string(&sender_ctx)
                                         .unwrap_or_default();
@@ -499,6 +507,8 @@ pub async fn run_gateway_adapter(
                                     let adapter = adapter.clone();
                                     let router = router.clone();
                                     let prompt = event.content.text.clone();
+                                    let dispatcher = dispatcher.clone();
+                                    let mode = message_processing_mode;
 
                                     // Slash command interception for gateway platforms
                                     // (Feishu/LINE/Telegram don't have native slash commands)
@@ -541,19 +551,52 @@ pub async fn run_gateway_adapter(
                                             channel.clone()
                                         };
 
-                                        if let Err(e) = router
-                                            .handle_message(
-                                                &adapter,
-                                                &thread_channel,
-                                                &sender_json,
-                                                &prompt,
-                                                vec![],
-                                                &trigger_msg,
-                                                false,
-                                            )
-                                            .await
-                                        {
-                                            error!("gateway message handling error: {e}");
+                                        match mode {
+                                            crate::config::MessageProcessingMode::PerMessage => {
+                                                if let Err(e) = router
+                                                    .handle_message(
+                                                        &adapter,
+                                                        &thread_channel,
+                                                        &sender_json,
+                                                        &prompt,
+                                                        vec![],
+                                                        &trigger_msg,
+                                                        false,
+                                                    )
+                                                    .await
+                                                {
+                                                    error!("gateway message handling error: {e}");
+                                                }
+                                            }
+                                            crate::config::MessageProcessingMode::Batched => {
+                                                if let Some(dispatcher) = dispatcher {
+                                                    let thread_key = format!(
+                                                        "{}:{}",
+                                                        thread_channel.platform,
+                                                        thread_channel.thread_id.as_deref()
+                                                            .unwrap_or(&thread_channel.channel_id)
+                                                    );
+                                                    let estimated_tokens =
+                                                        crate::dispatch::estimate_tokens(&prompt, &[]);
+                                                    let buf_msg = crate::dispatch::BufferedMessage {
+                                                        sender_json,
+                                                        prompt,
+                                                        extra_blocks: vec![],
+                                                        trigger_msg,
+                                                        arrived_at: std::time::Instant::now(),
+                                                        estimated_tokens,
+                                                        other_bot_present: false,
+                                                    };
+                                                    if let Err(e) = dispatcher
+                                                        .submit(thread_key, thread_channel, adapter, buf_msg)
+                                                        .await
+                                                    {
+                                                        error!("gateway dispatcher submit error: {e}");
+                                                    }
+                                                } else {
+                                                    error!("gateway batched mode enabled but no dispatcher configured");
+                                                }
+                                            }
                                         }
                                     });
                                 }
@@ -592,3 +635,29 @@ pub async fn run_gateway_adapter(
         backoff_secs = (backoff_secs * 2).min(MAX_BACKOFF);
     } // outer reconnect loop
 }
+
+/// Best-effort ISO 8601 UTC timestamp for the current moment (no external crate).
+fn chrono_now_iso8601() -> String {
+    use std::time::{SystemTime, UNIX_EPOCH};
+    let secs = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_secs();
+    // Reuse the same days_to_ymd logic inline
+    let days = secs / 86400;
+    let time_secs = secs % 86400;
+    let h = time_secs / 3600;
+    let m = (time_secs % 3600) / 60;
+    let s = time_secs % 60;
+    let z = days + 719468;
+    let era = z / 146097;
+    let doe = z % 146097;
+    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
+    let y = yoe + era * 400;
+    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+    let mp = (5 * doy + 2) / 153;
+    let d = doy - (153 * mp + 2) / 5 + 1;
+    let mo = if mp < 10 { mp + 3 } else { mp - 9 };
+    let y = if mo <= 2 { y + 1 } else { y };
+    format!("{y:04}-{mo:02}-{d:02}T{h:02}:{m:02}:{s:02}.000Z")
+}
diff --git a/src/main.rs b/src/main.rs
index ffe6985c..ccfd6721 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ mod bot_turns;
 mod config;
 mod cron;
 mod discord;
+mod dispatch;
 mod error_display;
 mod format;
 mod markdown;
@@ -142,6 +143,9 @@ async fn main() -> anyhow::Result<()> {
     // Shutdown signal for Slack adapter
     let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);
 
+    // Dispatcher handles tracked here so SIGTERM cleanup can call shutdown() on each (ADR §6.8).
+    let mut dispatchers: Vec<Arc<dispatch::Dispatcher>> = Vec::new();
+
     // Spawn cleanup task
     let cleanup_pool = pool.clone();
     let cleanup_handle = tokio::spawn(async move {
@@ -188,6 +192,18 @@ async fn main() -> anyhow::Result<()> {
         let max_bot_turns = slack_cfg.max_bot_turns;
         let slack_shutdown_rx = shutdown_rx.clone();
         let adapter = shared_slack_adapter.clone().expect("shared_slack_adapter must exist when slack config is present");
+        let slack_mode = slack_cfg.message_processing_mode;
+        let slack_dispatcher = if slack_mode == config::MessageProcessingMode::Batched {
+            let d = Arc::new(dispatch::Dispatcher::new(
+                router.clone(),
+                slack_cfg.max_buffered_messages,
+                slack_cfg.max_batch_tokens,
+            ));
+            dispatchers.push(d.clone());
+            Some(d)
+        } else {
+            None
+        };
         Some(tokio::spawn(async move {
             if let Err(e) = slack::run_slack_adapter(
                 adapter,
@@ -203,6 +219,8 @@ async fn main() -> anyhow::Result<()> {
                 stt,
                 router,
                 slack_shutdown_rx,
+                slack_dispatcher,
+                slack_mode,
             )
             .await
             {
@@ -218,6 +236,18 @@ async fn main() -> anyhow::Result<()> {
         let router = router.clone();
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
+        let gw_mode = gw_cfg.message_processing_mode;
+        let gw_dispatcher = if gw_mode == config::MessageProcessingMode::Batched {
+            let d = Arc::new(dispatch::Dispatcher::new(
+                router.clone(),
+                gw_cfg.max_buffered_messages,
+                gw_cfg.max_batch_tokens,
+            ));
+            dispatchers.push(d.clone());
+            Some(d)
+        } else {
+            None
+        };
         let params = gateway::GatewayParams {
             url: gw_cfg.url,
             platform: gw_cfg.platform,
@@ -230,7 +260,7 @@ async fn main() -> anyhow::Result<()> {
             streaming: gw_cfg.streaming,
         };
         Some(tokio::spawn(async move {
-            if let Err(e) = gateway::run_gateway_adapter(params, router, shutdown_rx).await {
+            if let Err(e) = gateway::run_gateway_adapter(params, router, shutdown_rx, gw_dispatcher, gw_mode).await {
                 error!("gateway adapter error: {e}");
             }
         }))
@@ -301,6 +331,19 @@ async fn main() -> anyhow::Result<()> {
             "starting discord adapter"
         );
 
+        let discord_mode = discord_cfg.message_processing_mode;
+        let discord_dispatcher = if discord_mode == config::MessageProcessingMode::Batched {
+            let d = Arc::new(dispatch::Dispatcher::new(
+                router.clone(),
+                discord_cfg.max_buffered_messages,
+                discord_cfg.max_batch_tokens,
+            ));
+            dispatchers.push(d.clone());
+            Some(d)
+        } else {
+            None
+        };
+
         let handler = discord::Handler {
             router,
             allow_all_channels,
@@ -318,6 +361,8 @@ async fn main() -> anyhow::Result<()> {
             max_bot_turns: discord_cfg.max_bot_turns,
             bot_turns: tokio::sync::Mutex::new(bot_turns::BotTurnTracker::new(discord_cfg.max_bot_turns)),
             allow_dm: discord_cfg.allow_dm,
+            dispatcher: discord_dispatcher,
+            message_processing_mode: discord_mode,
         };
 
         let intents = GatewayIntents::GUILD_MESSAGES
@@ -378,6 +423,10 @@ async fn main() -> anyhow::Result<()> {
         // cron.rs drains in-flight tasks for up to 30s, so wait slightly longer
         let _ = tokio::time::timeout(std::time::Duration::from_secs(35), handle).await;
     }
+    // Drain per-thread dispatchers and log buffered_lost counts before pool shutdown (ADR §6.8).
+    for d in &dispatchers {
+        d.shutdown();
+    }
     let shutdown_pool = pool;
     shutdown_pool.shutdown().await;
     info!("openab shut down");
diff --git a/src/slack.rs b/src/slack.rs
index 7619255f..fdc4265b 100644
--- a/src/slack.rs
+++ b/src/slack.rs
@@ -428,50 +428,6 @@ impl ChatAdapter for SlackAdapter {
     }
 }
 
-// --- Per-thread async queue (inspired by OpenClaw's KeyedAsyncQueue) ---
-
-/// Serialize async work per key while allowing unrelated keys to run concurrently.
-/// Same-key tasks execute in FIFO order; different keys run in parallel.
-/// Idle keys are cleaned up automatically after the last task settles.
-struct KeyedAsyncQueue {
-    tails: tokio::sync::Mutex<HashMap<String, Arc<tokio::sync::Semaphore>>>,
-}
-
-impl KeyedAsyncQueue {
-    fn new() -> Self {
-        Self {
-            tails: tokio::sync::Mutex::new(HashMap::new()),
-        }
-    }
-
-    /// Acquire a per-key permit. The returned guard must be held for the
-    /// duration of the async work. Dropping it allows the next queued task
-    /// for the same key to proceed.
-    ///
-    /// Performs lazy cleanup of idle semaphores to prevent unbounded growth
-    /// in long-running deployments.
-    async fn acquire(&self, key: &str) -> Option<tokio::sync::OwnedSemaphorePermit> {
-        let sem = {
-            let mut tails = self.tails.lock().await;
-            // Lazy cleanup: evict idle entries (available_permits == 1 means no one is holding or waiting)
-            if tails.len() > 100 {
-                tails.retain(|_, sem| Arc::strong_count(sem) > 1 || sem.available_permits() < 1);
-            }
-            tails
-                .entry(key.to_string())
-                .or_insert_with(|| Arc::new(tokio::sync::Semaphore::new(1)))
-                .clone()
-        };
-        match sem.acquire_owned().await {
-            Ok(permit) => Some(permit),
-            Err(e) => {
-                warn!(key, error = %e, "semaphore closed, skipping message");
-                None
-            }
-        }
-    }
-}
-
 // --- Socket Mode event loop ---
 
 /// Hard cap on consecutive bot messages in a thread. Prevents runaway loops.
@@ -494,8 +450,9 @@ pub async fn run_slack_adapter(
     stt_config: SttConfig,
     router: Arc<AdapterRouter>,
     mut shutdown_rx: watch::Receiver<bool>,
+    dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
+    message_processing_mode: crate::config::MessageProcessingMode,
 ) -> Result<()> {
-    let queue = Arc::new(KeyedAsyncQueue::new());
     let bot_token = adapter.bot_token().to_string();
     let bot_turns = Arc::new(tokio::sync::Mutex::new(BotTurnTracker::new(max_bot_turns)));
 
@@ -590,18 +547,9 @@ pub async fn run_slack_adapter(
                                                 let allowed_users = allowed_users.clone();
                                                 let stt_config = stt_config.clone();
                                                 let router = router.clone();
-                                                let queue = queue.clone();
-                                                // Queue key: thread_ts if already in a thread, otherwise ts.
-                                                // app_mention always has a channel context, so ts alone
-                                                // is unique enough (unlike message events in DMs where
-                                                // we prefix with channel_id to avoid ts collisions).
-                                                let queue_key = event["thread_ts"]
-                                                    .as_str()
-                                                    .or_else(|| event["ts"].as_str())
-                                                    .unwrap_or("")
-                                                    .to_string();
+                                                let dispatcher = dispatcher.clone();
+                                                let mode = message_processing_mode;
                                                 tokio::spawn(async move {
-                                                    let Some(_permit) = queue.acquire(&queue_key).await else { return };
                                                     handle_message(
                                                         &event,
                                                         &adapter,
@@ -612,6 +560,8 @@ pub async fn run_slack_adapter(
                                                         &allowed_users,
                                                         &stt_config,
                                                         &router,
+                                                        dispatcher.as_ref(),
+                                                        mode,
                                                     )
                                                     .await;
                                                 });
@@ -665,8 +615,7 @@ pub async fn run_slack_adapter(
                                                 // --- Bot turn tracking ---
                                                 // Runs before self-check so ALL bot messages (including own)
                                                 // count toward the per-thread limit. Matches Discord #483.
-                                                // Keyed on thread_ts when in a thread, else channel:ts (the
-                                                // same key shape used for per-thread queueing below).
+                                                // Keyed on thread_ts when in a thread, else channel:ts.
                                                 // Non-thread messages get a unique key per message, so the
                                                 // counter never accumulates — intentional, because bot-to-bot
                                                 // loops only happen inside threads.
@@ -821,7 +770,9 @@ pub async fn run_slack_adapter(
                                                     }
                                                 }
 
-                                                // Dispatch to handle_message (serialized per thread)
+                                                // Dispatch to handle_message (per-thread serialization comes
+                                                // from Dispatcher consumer task in batched mode and from
+                                                // pool.with_connection in per-message mode).
                                                 let event = event.clone();
                                                 let adapter = adapter.clone();
                                                 let bot_token = bot_token.clone();
@@ -829,19 +780,9 @@ pub async fn run_slack_adapter(
                                                 let allowed_users = allowed_users.clone();
                                                 let stt_config = stt_config.clone();
                                                 let router = router.clone();
-                                                let queue = queue.clone();
-                                                // Queue key: thread_ts if in a thread, otherwise channel:ts.
-                                                // Prefixed with channel_id for non-thread messages because
-                                                // DMs and channels can have overlapping ts values — the
-                                                // prefix ensures keys are globally unique.
-                                                let queue_key = event["thread_ts"]
-                                                    .as_str()
-                                                    .map(|s| s.to_string())
-                                                    .unwrap_or_else(|| {
-                                                        format!("{}:{}", channel_id, event["ts"].as_str().unwrap_or(""))
-                                                    });
+                                                let dispatcher = dispatcher.clone();
+                                                let mode = message_processing_mode;
                                                 tokio::spawn(async move {
-                                                    let Some(_permit) = queue.acquire(&queue_key).await else { return };
                                                     handle_message(
                                                         &event,
                                                         &adapter,
@@ -852,6 +793,8 @@ pub async fn run_slack_adapter(
                                                         &allowed_users,
                                                         &stt_config,
                                                         &router,
+                                                        dispatcher.as_ref(),
+                                                        mode,
                                                     )
                                                     .await;
                                                 });
@@ -923,6 +866,8 @@ async fn handle_message(
     allowed_users: &HashSet<String>,
     stt_config: &SttConfig,
     router: &Arc<AdapterRouter>,
+    dispatcher: Option<&Arc<crate::dispatch::Dispatcher>>,
+    message_processing_mode: crate::config::MessageProcessingMode,
 ) {
     let channel_id = match event["channel"].as_str() {
         Some(ch) => ch.to_string(),
@@ -1093,6 +1038,7 @@ async fn handle_message(
         channel_id: channel_id.clone(),
         thread_id: thread_ts.clone(),
         is_bot: is_bot_msg,
+        timestamp: slack_ts_to_iso8601(&ts),
     };
 
     let trigger_msg = MessageRef {
@@ -1133,11 +1079,41 @@ async fn handle_message(
         thread_channel.thread_id.as_deref()
             .is_some_and(|ts| cache.get(ts).is_some_and(|inst| inst.elapsed() < adapter.session_ttl))
     };
-    if let Err(e) = router
-        .handle_message(&adapter_dyn, &thread_channel, &sender_json, &prompt, extra_blocks, &trigger_msg, other_bot_present)
-        .await
-    {
-        error!("Slack handle_message error: {e}");
+    match message_processing_mode {
+        crate::config::MessageProcessingMode::PerMessage => {
+            if let Err(e) = router
+                .handle_message(&adapter_dyn, &thread_channel, &sender_json, &prompt, extra_blocks, &trigger_msg, other_bot_present)
+                .await
+            {
+                error!("Slack handle_message error: {e}");
+            }
+        }
+        crate::config::MessageProcessingMode::Batched => {
+            if let Some(dispatcher) = dispatcher {
+                let thread_key = format!(
+                    "slack:{}",
+                    thread_channel.thread_id.as_deref().unwrap_or(&thread_channel.channel_id)
+                );
+                let estimated_tokens = crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
+                let buf_msg = crate::dispatch::BufferedMessage {
+                    sender_json,
+                    prompt,
+                    extra_blocks,
+                    trigger_msg,
+                    arrived_at: std::time::Instant::now(),
+                    estimated_tokens,
+                    other_bot_present,
+                };
+                if let Err(e) = dispatcher
+                    .submit(thread_key, thread_channel, adapter_dyn, buf_msg)
+                    .await
+                {
+                    error!("Slack dispatcher submit error: {e}");
+                }
+            } else {
+                error!("Slack batched mode enabled but no dispatcher configured");
+            }
+        }
     }
 }
 
@@ -1162,6 +1138,45 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str {
         .unwrap_or("")
 }
 
+/// Convert a Slack epoch-seconds timestamp (e.g. "1714204397.123456") to ISO 8601 UTC.
+/// Returns a best-effort string; falls back to the raw ts on parse failure.
+fn slack_ts_to_iso8601(ts: &str) -> String {
+    // Slack ts format: "<unix_seconds>.<microseconds>"
+    let mut parts = ts.splitn(2, '.');
+    let secs = parts.next().unwrap_or("0").parse::<u64>().unwrap_or(0);
+    // Take first 3 digits of fractional part as milliseconds
+    let frac = parts.next().unwrap_or("000");
+    let ms: u64 = frac.chars().take(3).collect::<String>().parse().unwrap_or(0);
+
+    // Manual ISO 8601 from unix timestamp (no external crate needed)
+    // Days since epoch → year/month/day
+    let total_secs = secs;
+    let days = total_secs / 86400;
+    let time_secs = total_secs % 86400;
+    let h = time_secs / 3600;
+    let m = (time_secs % 3600) / 60;
+    let s = time_secs % 60;
+
+    // Gregorian calendar calculation
+    let (year, month, day) = days_to_ymd(days);
+    format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{ms:03}Z")
+}
+
+fn days_to_ymd(days: u64) -> (u64, u64, u64) {
+    // Algorithm from https://howardhinnant.github.io/date_algorithms.html
+    let z = days + 719468;
+    let era = z / 146097;
+    let doe = z % 146097;
+    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
+    let y = yoe + era * 400;
+    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+    let mp = (5 * doy + 2) / 153;
+    let d = doy - (153 * mp + 2) / 5 + 1;
+    let m = if mp < 10 { mp + 3 } else { mp - 9 };
+    let y = if m <= 2 { y + 1 } else { y };
+    (y, m, d)
+}
+
 /// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see
 /// the bare media type. Slack occasionally sends mimetypes like
 /// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form.

From 6b6ccf9604aa5848754c2f36824aad82dd97d484 Mon Sep 17 00:00:00 2001
From: Brett Chien <brettchien@users.noreply.github.com>
Date: Sun, 3 May 2026 06:29:19 +0000
Subject: [PATCH 02/32] refactor(dispatch): cleanup naming, parallelize queued
 reactions, use configured emoji on SendError
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename ThreadHandle._consumer → consumer (we actually .abort() it on cancel)
- Replace ThreadHandle::drain_pending(&mut self) with pending_count(&self) —
  read-only signature, name no longer implies side effects
- Parallelize 👀 reactions in dispatch_batch via futures::join_all instead of
  serial loop — first-token latency no longer scales with batch size
- SendError ❌ reaction now uses router.reactions_config() instead of
  ReactionsConfig::default() — respects user-configured emoji
- shutdown() switches to iter() (no longer needs &mut after the rename above)
- Tighten doc comments
- Cargo.lock: sync to openab 0.8.2 (Cargo.toml already at 0.8.2)
---
 src/dispatch.rs | 53 ++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 177d130e..18ff5da4 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -39,9 +39,8 @@ pub struct BufferedMessage {
     pub arrived_at: Instant,
     /// Rough token estimate for `max_batch_tokens` cap.
     pub estimated_tokens: usize,
-    /// Snapshot of "is another bot present in this thread" at submit time —
-    /// matches v0.8.2-beta.1's per-message by-value pattern. `dispatch_batch`
-    /// uses the freshest snapshot in the batch (`batch.last()`).
+    /// Snapshot at submit time. Captured per-message so a batch reflects the
+    /// freshest known state; `dispatch_batch` reads `batch.last()`.
     pub other_bot_present: bool,
 }
 
@@ -66,7 +65,7 @@ impl std::fmt::Display for DispatchError {
 
 struct ThreadHandle {
     tx: tokio::sync::mpsc::Sender<BufferedMessage>,
-    _consumer: tokio::task::JoinHandle<()>,
+    consumer: tokio::task::JoinHandle<()>,
     /// Race-safe eviction counter (§2.5). Plain u64 — all reads/writes under per_thread lock.
     generation: u64,
     channel_id: String,
@@ -74,11 +73,9 @@ struct ThreadHandle {
 }
 
 impl ThreadHandle {
-    /// Close the sender and drain remaining messages for shutdown logging.
-    fn drain_pending(&mut self) -> usize {
-        // Closing the sender causes the consumer to exit on next recv().
-        // We can't synchronously drain an async channel, so we report the
-        // approximate capacity used via the channel's current length.
+    /// Approximate number of messages still buffered in the mpsc — used for
+    /// shutdown / cancel logging. Not exact: tokio's mpsc has no sync `.len()`.
+    fn pending_count(&self) -> usize {
         self.tx.max_capacity() - self.tx.capacity()
     }
 }
@@ -159,7 +156,7 @@ impl Dispatcher {
                 ));
                 ThreadHandle {
                     tx,
-                    _consumer: consumer,
+                    consumer,
                     generation: next_g,
                     channel_id: thread_channel.channel_id.clone(),
                     adapter_kind: adapter.platform().to_string(),
@@ -167,7 +164,6 @@ impl Dispatcher {
             });
             (entry.tx.clone(), entry.generation)
         };
-        // Dispatcher mutex released — held only to look up/insert the handle.
 
         if let Err(e) = tx.send(msg).await {
             // Consumer has exited — race-safe eviction under lock (§2.5).
@@ -179,7 +175,7 @@ impl Dispatcher {
             let _ = adapter
                 .add_reaction(
                     &failed_msg.trigger_msg,
-                    &crate::config::ReactionsConfig::default().emojis.error,
+                    &self.router.reactions_config().emojis.error,
                 )
                 .await;
             let _ = adapter
@@ -203,8 +199,8 @@ impl Dispatcher {
     pub fn cancel_buffered(&self, thread_key: &str) -> usize {
         let mut map = self.per_thread.lock().unwrap();
         if let Some(handle) = map.remove(thread_key) {
-            let pending = handle.tx.max_capacity() - handle.tx.capacity();
-            handle._consumer.abort();
+            let pending = handle.pending_count();
+            handle.consumer.abort();
             pending
         } else {
             0
@@ -233,15 +229,15 @@ impl Dispatcher {
     /// Log buffered-message counts and drop all handles (called on SIGTERM).
     pub fn shutdown(&self) {
         let mut map = self.per_thread.lock().unwrap();
-        for (thread_id, handle) in map.iter_mut() {
-            let pending = handle.drain_pending();
+        for (thread_id, handle) in map.iter() {
+            let pending = handle.pending_count();
             if pending > 0 {
                 warn!(
                     thread_id = %thread_id,
                     channel   = %handle.channel_id,
                     adapter   = %handle.adapter_kind,
                     buffered_lost = pending,
-                    "shutdown drained pending messages without dispatch",
+                    "shutdown dropped pending messages without dispatch",
                 );
             }
         }
@@ -273,7 +269,9 @@ async fn consumer_loop(
             Some(msg) => msg,
             None => match rx.recv().await {
                 Some(msg) => msg,
-                None => break, // all senders dropped → cleanup_idle evicted us
+                // All senders dropped → either shutdown() cleared the map, or
+                // cancel_buffered() removed our entry. Exit the loop.
+                None => break,
             },
         };
 
@@ -296,9 +294,7 @@ async fn consumer_loop(
             }
         }
 
-        // §2.6 freshness: use the freshest snapshot in the batch — the last
-        // message's `other_bot_present` (captured at submit time, mirrors
-        // v0.8.2-beta.1's per-message by-value pattern). batch is non-empty.
+        // §2.6: read the freshest snapshot in the batch (batch is non-empty).
         let bot_present = batch.last().unwrap().other_bot_present;
 
         dispatch_batch(
@@ -311,7 +307,6 @@ async fn consumer_loop(
         )
         .await;
     }
-    // rx.recv() returned None → all senders dropped → exit cleanly.
 }
 
 // ---------------------------------------------------------------------------
@@ -330,10 +325,14 @@ async fn dispatch_batch(
     let batch_size = batch.len();
 
     // Apply 👀 reaction to every message in the batch before dispatch (§6.7).
-    let queued_emoji = crate::config::ReactionsConfig::default().emojis.queued;
-    for msg in &batch {
-        let _ = adapter.add_reaction(&msg.trigger_msg, &queued_emoji).await;
-    }
+    // Parallelized so first-token latency isn't paid for N serial reaction RPCs.
+    let queued_emoji = &router.reactions_config().emojis.queued;
+    futures_util::future::join_all(
+        batch
+            .iter()
+            .map(|msg| adapter.add_reaction(&msg.trigger_msg, queued_emoji)),
+    )
+    .await;
 
     // Collect per-event observability data.
     let tokens_per_event: Vec<usize> = batch.iter().map(|m| m.estimated_tokens).collect();
@@ -689,7 +688,7 @@ mod tests {
         let consumer = tokio::spawn(async {});
         ThreadHandle {
             tx,
-            _consumer: consumer,
+            consumer,
             generation,
             channel_id: "C".into(),
             adapter_kind: "discord".into(),

From 37f95e4390523359eb4b452bf89de5843a61730a Mon Sep 17 00:00:00 2001
From: Brett Chien <brettchien@users.noreply.github.com>
Date: Sun, 3 May 2026 06:29:31 +0000
Subject: [PATCH 03/32] feat(discord): add /cancel-all slash command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the standalone /cancel-all path from ADR §4.4 turn-boundary batching.
Unlike /reset, /cancel-all is non-destructive to the session.

- /cancel-all: dispatcher.cancel_buffered() + pool.cancel_session()
  → drops buffered messages + aborts in-flight ACP turn, keeps session
- /reset: unchanged (still drops buffered + cancels in-flight + tears down
  session); doc comment updated to reflect that /reset is a superset of
  /cancel-all rather than "/reset includes /cancel-all"

Discord-only — Slack adapter explicitly drops slash_commands envelopes
(no thread routing on channel-level delivery), Gateway has no user-facing
slash command surface.

Response messages cover all four (cancel_session result × dropped count) cases.
---
 src/discord.rs | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/discord.rs b/src/discord.rs
index b464b5ad..535361d1 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -701,6 +701,8 @@ impl EventHandler for Handler {
                 .description("Select the agent mode for this session"),
             CreateCommand::new("cancel")
                 .description("Cancel the current operation"),
+            CreateCommand::new("cancel-all")
+                .description("Cancel current operation and drop all buffered messages"),
             CreateCommand::new("reset")
                 .description("Reset the conversation session"),
         ];
@@ -737,6 +739,9 @@ impl EventHandler for Handler {
             Interaction::Command(cmd) if cmd.data.name == "cancel" => {
                 self.handle_cancel_command(&ctx, &cmd).await;
             }
+            Interaction::Command(cmd) if cmd.data.name == "cancel-all" => {
+                self.handle_cancel_all_command(&ctx, &cmd).await;
+            }
             Interaction::Command(cmd) if cmd.data.name == "reset" => {
                 self.handle_reset_command(&ctx, &cmd).await;
             }
@@ -899,6 +904,36 @@ impl Handler {
         }
     }
 
+    async fn handle_cancel_all_command(
+        &self,
+        ctx: &Context,
+        cmd: &serenity::model::application::CommandInteraction,
+    ) {
+        let thread_key = format!("discord:{}", cmd.channel_id.get());
+
+        let dropped = self
+            .dispatcher
+            .as_ref()
+            .map(|d| d.cancel_buffered(&thread_key))
+            .unwrap_or(0);
+
+        let cancel_result = self.router.pool().cancel_session(&thread_key).await;
+
+        let msg = match (cancel_result, dropped) {
+            (Ok(()), 0) => "🛑 Cancel signal sent.".to_string(),
+            (Ok(()), n) => format!("🛑 Cancel signal sent. Dropped {n} buffered message(s)."),
+            (Err(_), 0) => "⚠️ Nothing to cancel — no active session and no buffered messages.".to_string(),
+            (Err(_), n) => format!("🛑 Dropped {n} buffered message(s). No active session to cancel."),
+        };
+
+        let response = CreateInteractionResponse::Message(
+            CreateInteractionResponseMessage::new().content(msg).ephemeral(true),
+        );
+        if let Err(e) = cmd.create_response(&ctx.http, response).await {
+            tracing::error!(error = %e, "failed to respond to /cancel-all command");
+        }
+    }
+
     async fn handle_reset_command(
         &self,
         ctx: &Context,
@@ -906,8 +941,7 @@ impl Handler {
     ) {
         let thread_key = format!("discord:{}", cmd.channel_id.get());
 
-        // Drop any messages buffered for this thread before resetting the session.
-        // /reset semantics include /cancel-all: discard buffered work, then reset.
+        // /reset is a superset of /cancel-all: drop buffered work, then tear down the session.
         let dropped = self
             .dispatcher
             .as_ref()

From 810e26d0095f7bb30a95cccd47ee565a41115764 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Sun, 3 May 2026 10:41:49 +0000
Subject: [PATCH 04/32] refactor: unify PerMessage and Batched modes through
 Dispatcher

Both modes now serialize through the per-thread Dispatcher consumer
task. PerMessage = max_buffered_messages=1 (each message dispatches
alone, FIFO). Batched = configured cap (greedy drain up to
max_batch_tokens).

Removes the bifurcated match in Slack/Discord/Gateway hot paths,
eliminates the Option<Arc<Dispatcher>> indirection, and addresses
chaodu-agent PR #686 review concern about PerMessage FIFO regression
after the KeyedAsyncQueue removal.
---
 src/discord.rs | 77 ++++++++++++++------------------------------------
 src/gateway.rs | 76 ++++++++++++++++---------------------------------
 src/main.rs    | 67 ++++++++++++++++++++-----------------------
 src/slack.rs   | 76 ++++++++++++++++---------------------------------
 4 files changed, 99 insertions(+), 197 deletions(-)

diff --git a/src/discord.rs b/src/discord.rs
index 535361d1..358e8456 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -160,9 +160,8 @@ pub struct Handler {
     pub bot_turns: tokio::sync::Mutex<BotTurnTracker>,
     /// Allow the bot to respond to Discord DMs.
     pub allow_dm: bool,
-    /// Batched-mode dispatcher (None in per-message mode).
-    pub dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
-    pub message_processing_mode: crate::config::MessageProcessingMode,
+    /// Per-thread dispatcher (PerMessage mode uses cap=1 for FIFO; Batched uses configured cap).
+    pub dispatcher: Arc<crate::dispatch::Dispatcher>,
 }
 
 impl Handler {
@@ -639,53 +638,27 @@ impl EventHandler for Handler {
             sender.thread_id = Some(thread_channel.channel_id.clone());
         }
 
-        let router = self.router.clone();
-        let mode = self.message_processing_mode;
         let dispatcher = self.dispatcher.clone();
 
         tokio::spawn(async move {
             let sender_json = serde_json::to_string(&sender).unwrap();
-            match mode {
-                crate::config::MessageProcessingMode::PerMessage => {
-                    if let Err(e) = router
-                        .handle_message(
-                            &adapter,
-                            &thread_channel,
-                            &sender_json,
-                            &prompt,
-                            extra_blocks,
-                            &trigger_msg,
-                            other_bot_present_flag,
-                        )
-                        .await
-                    {
-                        error!("handle_message error: {e}");
-                    }
-                }
-                crate::config::MessageProcessingMode::Batched => {
-                    if let Some(dispatcher) = dispatcher {
-                        let thread_key = format!("discord:{}", thread_channel.channel_id);
-                        let estimated_tokens =
-                            crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
-                        let buf_msg = crate::dispatch::BufferedMessage {
-                            sender_json,
-                            prompt,
-                            extra_blocks,
-                            trigger_msg,
-                            arrived_at: std::time::Instant::now(),
-                            estimated_tokens,
-                            other_bot_present: other_bot_present_flag,
-                        };
-                        if let Err(e) = dispatcher
-                            .submit(thread_key, thread_channel, adapter, buf_msg)
-                            .await
-                        {
-                            error!("dispatcher submit error: {e}");
-                        }
-                    } else {
-                        error!("batched mode enabled but no dispatcher configured");
-                    }
-                }
+            let thread_key = format!("discord:{}", thread_channel.channel_id);
+            let estimated_tokens =
+                crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
+            let buf_msg = crate::dispatch::BufferedMessage {
+                sender_json,
+                prompt,
+                extra_blocks,
+                trigger_msg,
+                arrived_at: std::time::Instant::now(),
+                estimated_tokens,
+                other_bot_present: other_bot_present_flag,
+            };
+            if let Err(e) = dispatcher
+                .submit(thread_key, thread_channel, adapter, buf_msg)
+                .await
+            {
+                error!("dispatcher submit error: {e}");
             }
         });
     }
@@ -911,11 +884,7 @@ impl Handler {
     ) {
         let thread_key = format!("discord:{}", cmd.channel_id.get());
 
-        let dropped = self
-            .dispatcher
-            .as_ref()
-            .map(|d| d.cancel_buffered(&thread_key))
-            .unwrap_or(0);
+        let dropped = self.dispatcher.cancel_buffered(&thread_key);
 
         let cancel_result = self.router.pool().cancel_session(&thread_key).await;
 
@@ -942,11 +911,7 @@ impl Handler {
         let thread_key = format!("discord:{}", cmd.channel_id.get());
 
         // /reset is a superset of /cancel-all: drop buffered work, then tear down the session.
-        let dropped = self
-            .dispatcher
-            .as_ref()
-            .map(|d| d.cancel_buffered(&thread_key))
-            .unwrap_or(0);
+        let dropped = self.dispatcher.cancel_buffered(&thread_key);
 
         let result = self.router.pool().reset_session(&thread_key).await;
 
diff --git a/src/gateway.rs b/src/gateway.rs
index 641c2dfe..c2c5f41f 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -1,4 +1,4 @@
-use crate::adapter::{AdapterRouter, ChannelRef, ChatAdapter, MessageRef, SenderContext};
+use crate::adapter::{ChannelRef, ChatAdapter, MessageRef, SenderContext};
 use anyhow::Result;
 use async_trait::async_trait;
 use futures_util::{SinkExt, StreamExt};
@@ -348,10 +348,8 @@ pub struct GatewayParams {
 
 pub async fn run_gateway_adapter(
     params: GatewayParams,
-    router: Arc<AdapterRouter>,
     mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
-    dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
-    message_processing_mode: crate::config::MessageProcessingMode,
+    dispatcher: Arc<crate::dispatch::Dispatcher>,
 ) -> Result<()> {
     let platform: &'static str = Box::leak(params.platform.into_boxed_str());
 
@@ -505,10 +503,8 @@ pub async fn run_gateway_adapter(
                                     };
 
                                     let adapter = adapter.clone();
-                                    let router = router.clone();
                                     let prompt = event.content.text.clone();
                                     let dispatcher = dispatcher.clone();
-                                    let mode = message_processing_mode;
 
                                     // Slash command interception for gateway platforms
                                     // (Feishu/LINE/Telegram don't have native slash commands)
@@ -551,52 +547,28 @@ pub async fn run_gateway_adapter(
                                             channel.clone()
                                         };
 
-                                        match mode {
-                                            crate::config::MessageProcessingMode::PerMessage => {
-                                                if let Err(e) = router
-                                                    .handle_message(
-                                                        &adapter,
-                                                        &thread_channel,
-                                                        &sender_json,
-                                                        &prompt,
-                                                        vec![],
-                                                        &trigger_msg,
-                                                        false,
-                                                    )
-                                                    .await
-                                                {
-                                                    error!("gateway message handling error: {e}");
-                                                }
-                                            }
-                                            crate::config::MessageProcessingMode::Batched => {
-                                                if let Some(dispatcher) = dispatcher {
-                                                    let thread_key = format!(
-                                                        "{}:{}",
-                                                        thread_channel.platform,
-                                                        thread_channel.thread_id.as_deref()
-                                                            .unwrap_or(&thread_channel.channel_id)
-                                                    );
-                                                    let estimated_tokens =
-                                                        crate::dispatch::estimate_tokens(&prompt, &[]);
-                                                    let buf_msg = crate::dispatch::BufferedMessage {
-                                                        sender_json,
-                                                        prompt,
-                                                        extra_blocks: vec![],
-                                                        trigger_msg,
-                                                        arrived_at: std::time::Instant::now(),
-                                                        estimated_tokens,
-                                                        other_bot_present: false,
-                                                    };
-                                                    if let Err(e) = dispatcher
-                                                        .submit(thread_key, thread_channel, adapter, buf_msg)
-                                                        .await
-                                                    {
-                                                        error!("gateway dispatcher submit error: {e}");
-                                                    }
-                                                } else {
-                                                    error!("gateway batched mode enabled but no dispatcher configured");
-                                                }
-                                            }
+                                        let thread_key = format!(
+                                            "{}:{}",
+                                            thread_channel.platform,
+                                            thread_channel.thread_id.as_deref()
+                                                .unwrap_or(&thread_channel.channel_id)
+                                        );
+                                        let estimated_tokens =
+                                            crate::dispatch::estimate_tokens(&prompt, &[]);
+                                        let buf_msg = crate::dispatch::BufferedMessage {
+                                            sender_json,
+                                            prompt,
+                                            extra_blocks: vec![],
+                                            trigger_msg,
+                                            arrived_at: std::time::Instant::now(),
+                                            estimated_tokens,
+                                            other_bot_present: false,
+                                        };
+                                        if let Err(e) = dispatcher
+                                            .submit(thread_key, thread_channel, adapter, buf_msg)
+                                            .await
+                                        {
+                                            error!("gateway dispatcher submit error: {e}");
                                         }
                                     });
                                 }
diff --git a/src/main.rs b/src/main.rs
index ccfd6721..d5438a0c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -192,18 +192,18 @@ async fn main() -> anyhow::Result<()> {
         let max_bot_turns = slack_cfg.max_bot_turns;
         let slack_shutdown_rx = shutdown_rx.clone();
         let adapter = shared_slack_adapter.clone().expect("shared_slack_adapter must exist when slack config is present");
-        let slack_mode = slack_cfg.message_processing_mode;
-        let slack_dispatcher = if slack_mode == config::MessageProcessingMode::Batched {
-            let d = Arc::new(dispatch::Dispatcher::new(
-                router.clone(),
-                slack_cfg.max_buffered_messages,
-                slack_cfg.max_batch_tokens,
-            ));
-            dispatchers.push(d.clone());
-            Some(d)
-        } else {
-            None
+        // Dispatcher is the sole serialization path for both modes. PerMessage =
+        // cap 1 (each message dispatches alone, FIFO), Batched = configured cap.
+        let slack_cap = match slack_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage => 1,
+            config::MessageProcessingMode::Batched => slack_cfg.max_buffered_messages,
         };
+        let slack_dispatcher = Arc::new(dispatch::Dispatcher::new(
+            router.clone(),
+            slack_cap,
+            slack_cfg.max_batch_tokens,
+        ));
+        dispatchers.push(slack_dispatcher.clone());
         Some(tokio::spawn(async move {
             if let Err(e) = slack::run_slack_adapter(
                 adapter,
@@ -217,10 +217,8 @@ async fn main() -> anyhow::Result<()> {
                 slack_cfg.allow_user_messages,
                 max_bot_turns,
                 stt,
-                router,
                 slack_shutdown_rx,
                 slack_dispatcher,
-                slack_mode,
             )
             .await
             {
@@ -236,18 +234,16 @@ async fn main() -> anyhow::Result<()> {
         let router = router.clone();
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
-        let gw_mode = gw_cfg.message_processing_mode;
-        let gw_dispatcher = if gw_mode == config::MessageProcessingMode::Batched {
-            let d = Arc::new(dispatch::Dispatcher::new(
-                router.clone(),
-                gw_cfg.max_buffered_messages,
-                gw_cfg.max_batch_tokens,
-            ));
-            dispatchers.push(d.clone());
-            Some(d)
-        } else {
-            None
+        let gw_cap = match gw_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage => 1,
+            config::MessageProcessingMode::Batched => gw_cfg.max_buffered_messages,
         };
+        let gw_dispatcher = Arc::new(dispatch::Dispatcher::new(
+            router.clone(),
+            gw_cap,
+            gw_cfg.max_batch_tokens,
+        ));
+        dispatchers.push(gw_dispatcher.clone());
         let params = gateway::GatewayParams {
             url: gw_cfg.url,
             platform: gw_cfg.platform,
@@ -260,7 +256,7 @@ async fn main() -> anyhow::Result<()> {
             streaming: gw_cfg.streaming,
         };
         Some(tokio::spawn(async move {
-            if let Err(e) = gateway::run_gateway_adapter(params, router, shutdown_rx, gw_dispatcher, gw_mode).await {
+            if let Err(e) = gateway::run_gateway_adapter(params, shutdown_rx, gw_dispatcher).await {
                 error!("gateway adapter error: {e}");
             }
         }))
@@ -331,18 +327,16 @@ async fn main() -> anyhow::Result<()> {
             "starting discord adapter"
         );
 
-        let discord_mode = discord_cfg.message_processing_mode;
-        let discord_dispatcher = if discord_mode == config::MessageProcessingMode::Batched {
-            let d = Arc::new(dispatch::Dispatcher::new(
-                router.clone(),
-                discord_cfg.max_buffered_messages,
-                discord_cfg.max_batch_tokens,
-            ));
-            dispatchers.push(d.clone());
-            Some(d)
-        } else {
-            None
+        let discord_cap = match discord_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage => 1,
+            config::MessageProcessingMode::Batched => discord_cfg.max_buffered_messages,
         };
+        let discord_dispatcher = Arc::new(dispatch::Dispatcher::new(
+            router.clone(),
+            discord_cap,
+            discord_cfg.max_batch_tokens,
+        ));
+        dispatchers.push(discord_dispatcher.clone());
 
         let handler = discord::Handler {
             router,
@@ -362,7 +356,6 @@ async fn main() -> anyhow::Result<()> {
             bot_turns: tokio::sync::Mutex::new(bot_turns::BotTurnTracker::new(discord_cfg.max_bot_turns)),
             allow_dm: discord_cfg.allow_dm,
             dispatcher: discord_dispatcher,
-            message_processing_mode: discord_mode,
         };
 
         let intents = GatewayIntents::GUILD_MESSAGES
diff --git a/src/slack.rs b/src/slack.rs
index fdc4265b..b87cc940 100644
--- a/src/slack.rs
+++ b/src/slack.rs
@@ -1,5 +1,5 @@
 use crate::acp::ContentBlock;
-use crate::adapter::{AdapterRouter, ChatAdapter, ChannelRef, MessageRef, SenderContext};
+use crate::adapter::{ChatAdapter, ChannelRef, MessageRef, SenderContext};
 use crate::bot_turns::{BotTurnTracker, TurnAction, TurnSeverity};
 use crate::config::{AllowBots, AllowUsers, SttConfig};
 use crate::media;
@@ -448,10 +448,8 @@ pub async fn run_slack_adapter(
     allow_user_messages: AllowUsers,
     max_bot_turns: u32,
     stt_config: SttConfig,
-    router: Arc<AdapterRouter>,
     mut shutdown_rx: watch::Receiver<bool>,
-    dispatcher: Option<Arc<crate::dispatch::Dispatcher>>,
-    message_processing_mode: crate::config::MessageProcessingMode,
+    dispatcher: Arc<crate::dispatch::Dispatcher>,
 ) -> Result<()> {
     let bot_token = adapter.bot_token().to_string();
     let bot_turns = Arc::new(tokio::sync::Mutex::new(BotTurnTracker::new(max_bot_turns)));
@@ -546,9 +544,7 @@ pub async fn run_slack_adapter(
                                                 let allowed_channels = allowed_channels.clone();
                                                 let allowed_users = allowed_users.clone();
                                                 let stt_config = stt_config.clone();
-                                                let router = router.clone();
                                                 let dispatcher = dispatcher.clone();
-                                                let mode = message_processing_mode;
                                                 tokio::spawn(async move {
                                                     handle_message(
                                                         &event,
@@ -559,9 +555,7 @@ pub async fn run_slack_adapter(
                                                         &allowed_channels,
                                                         &allowed_users,
                                                         &stt_config,
-                                                        &router,
-                                                        dispatcher.as_ref(),
-                                                        mode,
+                                                        &dispatcher,
                                                     )
                                                     .await;
                                                 });
@@ -779,9 +773,7 @@ pub async fn run_slack_adapter(
                                                 let allowed_channels = allowed_channels.clone();
                                                 let allowed_users = allowed_users.clone();
                                                 let stt_config = stt_config.clone();
-                                                let router = router.clone();
                                                 let dispatcher = dispatcher.clone();
-                                                let mode = message_processing_mode;
                                                 tokio::spawn(async move {
                                                     handle_message(
                                                         &event,
@@ -792,9 +784,7 @@ pub async fn run_slack_adapter(
                                                         &allowed_channels,
                                                         &allowed_users,
                                                         &stt_config,
-                                                        &router,
-                                                        dispatcher.as_ref(),
-                                                        mode,
+                                                        &dispatcher,
                                                     )
                                                     .await;
                                                 });
@@ -865,9 +855,7 @@ async fn handle_message(
     allowed_channels: &HashSet<String>,
     allowed_users: &HashSet<String>,
     stt_config: &SttConfig,
-    router: &Arc<AdapterRouter>,
-    dispatcher: Option<&Arc<crate::dispatch::Dispatcher>>,
-    message_processing_mode: crate::config::MessageProcessingMode,
+    dispatcher: &Arc<crate::dispatch::Dispatcher>,
 ) {
     let channel_id = match event["channel"].as_str() {
         Some(ch) => ch.to_string(),
@@ -1079,41 +1067,25 @@ async fn handle_message(
         thread_channel.thread_id.as_deref()
             .is_some_and(|ts| cache.get(ts).is_some_and(|inst| inst.elapsed() < adapter.session_ttl))
     };
-    match message_processing_mode {
-        crate::config::MessageProcessingMode::PerMessage => {
-            if let Err(e) = router
-                .handle_message(&adapter_dyn, &thread_channel, &sender_json, &prompt, extra_blocks, &trigger_msg, other_bot_present)
-                .await
-            {
-                error!("Slack handle_message error: {e}");
-            }
-        }
-        crate::config::MessageProcessingMode::Batched => {
-            if let Some(dispatcher) = dispatcher {
-                let thread_key = format!(
-                    "slack:{}",
-                    thread_channel.thread_id.as_deref().unwrap_or(&thread_channel.channel_id)
-                );
-                let estimated_tokens = crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
-                let buf_msg = crate::dispatch::BufferedMessage {
-                    sender_json,
-                    prompt,
-                    extra_blocks,
-                    trigger_msg,
-                    arrived_at: std::time::Instant::now(),
-                    estimated_tokens,
-                    other_bot_present,
-                };
-                if let Err(e) = dispatcher
-                    .submit(thread_key, thread_channel, adapter_dyn, buf_msg)
-                    .await
-                {
-                    error!("Slack dispatcher submit error: {e}");
-                }
-            } else {
-                error!("Slack batched mode enabled but no dispatcher configured");
-            }
-        }
+    let thread_key = format!(
+        "slack:{}",
+        thread_channel.thread_id.as_deref().unwrap_or(&thread_channel.channel_id)
+    );
+    let estimated_tokens = crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
+    let buf_msg = crate::dispatch::BufferedMessage {
+        sender_json,
+        prompt,
+        extra_blocks,
+        trigger_msg,
+        arrived_at: std::time::Instant::now(),
+        estimated_tokens,
+        other_bot_present,
+    };
+    if let Err(e) = dispatcher
+        .submit(thread_key, thread_channel, adapter_dyn, buf_msg)
+        .await
+    {
+        error!("Slack dispatcher submit error: {e}");
     }
 }
 

From 66fcf3f4f8ff9ec4b97a3d2afa468b37f75ecbb8 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Sun, 3 May 2026 12:02:10 +0000
Subject: [PATCH 05/32] chore(dispatch): address PR #686 NITs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Extract duplicated days_to_ymd / ISO 8601 conversion from slack.rs
  + gateway.rs into new src/timestamp.rs (with unit tests).
- Add sender_name to BufferedMessage per ADR §2.3 — denormalised from
  sender_json so dispatch_batch tracing doesn't pay a JSON parse.
- impl std::error::Error for DispatchError so it composes with anyhow.
---
 src/discord.rs   |  2 ++
 src/dispatch.rs  |  8 +++++
 src/gateway.rs   | 29 ++--------------
 src/main.rs      |  1 +
 src/slack.rs     | 42 ++---------------------
 src/timestamp.rs | 88 ++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 104 insertions(+), 66 deletions(-)
 create mode 100644 src/timestamp.rs

diff --git a/src/discord.rs b/src/discord.rs
index 358e8456..ac5574a6 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -641,12 +641,14 @@ impl EventHandler for Handler {
         let dispatcher = self.dispatcher.clone();
 
         tokio::spawn(async move {
+            let sender_name = sender.sender_name.clone();
             let sender_json = serde_json::to_string(&sender).unwrap();
             let thread_key = format!("discord:{}", thread_channel.channel_id);
             let estimated_tokens =
                 crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
             let buf_msg = crate::dispatch::BufferedMessage {
                 sender_json,
+                sender_name,
                 prompt,
                 extra_blocks,
                 trigger_msg,
diff --git a/src/dispatch.rs b/src/dispatch.rs
index 18ff5da4..17000e67 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -29,6 +29,10 @@ use crate::reactions::StatusReactionController;
 pub struct BufferedMessage {
     /// Serialised SenderContext JSON (already built by the platform adapter).
     pub sender_json: String,
+    /// Author display name — denormalised from `sender_json` so observability
+    /// fields (per-event tracing in `dispatch_batch`) don't pay a JSON parse.
+    /// Per ADR §2.3 each arrival event carries its sender name.
+    pub sender_name: String,
     /// User-visible prompt text (verbatim, never transformed).
     pub prompt: String,
     /// Attachment blocks (images, STT transcripts) in arrival order.
@@ -59,6 +63,8 @@ impl std::fmt::Display for DispatchError {
     }
 }
 
+impl std::error::Error for DispatchError {}
+
 // ---------------------------------------------------------------------------
 // Internal types
 // ---------------------------------------------------------------------------
@@ -340,6 +346,7 @@ async fn dispatch_batch(
         .iter()
         .map(|m| m.arrived_at.elapsed().as_millis())
         .collect();
+    let senders: Vec<&str> = batch.iter().map(|m| m.sender_name.as_str()).collect();
 
     // Pack all arrival events into one Vec<ContentBlock> (§3.3).
     let mut content_blocks: Vec<ContentBlock> = Vec::new();
@@ -421,6 +428,7 @@ async fn dispatch_batch(
         agent_dispatch_ms   = agent_dispatch_ms,
         tokens_per_event    = ?tokens_per_event,
         wait_ms             = ?wait_ms,
+        senders             = ?senders,
         "batch dispatched",
     );
 }
diff --git a/src/gateway.rs b/src/gateway.rs
index c2c5f41f..f7b4d27e 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -489,7 +489,7 @@ pub async fn run_gateway_adapter(
                                         is_bot: event.sender.is_bot,
                                         // Gateway: use event timestamp if available, else broker receive time
                                         timestamp: if event.timestamp.is_empty() {
-                                            chrono_now_iso8601()
+                                            crate::timestamp::now_iso8601()
                                         } else {
                                             event.timestamp.clone()
                                         },
@@ -504,6 +504,7 @@ pub async fn run_gateway_adapter(
 
                                     let adapter = adapter.clone();
                                     let prompt = event.content.text.clone();
+                                    let sender_name = event.sender.name.clone();
                                     let dispatcher = dispatcher.clone();
 
                                     // Slash command interception for gateway platforms
@@ -557,6 +558,7 @@ pub async fn run_gateway_adapter(
                                             crate::dispatch::estimate_tokens(&prompt, &[]);
                                         let buf_msg = crate::dispatch::BufferedMessage {
                                             sender_json,
+                                            sender_name,
                                             prompt,
                                             extra_blocks: vec![],
                                             trigger_msg,
@@ -608,28 +610,3 @@ pub async fn run_gateway_adapter(
     } // outer reconnect loop
 }
 
-/// Best-effort ISO 8601 UTC timestamp for the current moment (no external crate).
-fn chrono_now_iso8601() -> String {
-    use std::time::{SystemTime, UNIX_EPOCH};
-    let secs = SystemTime::now()
-        .duration_since(UNIX_EPOCH)
-        .unwrap_or_default()
-        .as_secs();
-    // Reuse the same days_to_ymd logic inline
-    let days = secs / 86400;
-    let time_secs = secs % 86400;
-    let h = time_secs / 3600;
-    let m = (time_secs % 3600) / 60;
-    let s = time_secs % 60;
-    let z = days + 719468;
-    let era = z / 146097;
-    let doe = z % 146097;
-    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
-    let y = yoe + era * 400;
-    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
-    let mp = (5 * doy + 2) / 153;
-    let d = doy - (153 * mp + 2) / 5 + 1;
-    let mo = if mp < 10 { mp + 3 } else { mp - 9 };
-    let y = if mo <= 2 { y + 1 } else { y };
-    format!("{y:04}-{mo:02}-{d:02}T{h:02}:{m:02}:{s:02}.000Z")
-}
diff --git a/src/main.rs b/src/main.rs
index d5438a0c..53f81253 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,6 +14,7 @@ mod setup;
 mod slack;
 mod stt;
 mod gateway;
+mod timestamp;
 
 use adapter::AdapterRouter;
 use clap::Parser;
diff --git a/src/slack.rs b/src/slack.rs
index b87cc940..96390ccb 100644
--- a/src/slack.rs
+++ b/src/slack.rs
@@ -1026,7 +1026,7 @@ async fn handle_message(
         channel_id: channel_id.clone(),
         thread_id: thread_ts.clone(),
         is_bot: is_bot_msg,
-        timestamp: slack_ts_to_iso8601(&ts),
+        timestamp: crate::timestamp::slack_ts_to_iso8601(&ts),
     };
 
     let trigger_msg = MessageRef {
@@ -1074,6 +1074,7 @@ async fn handle_message(
     let estimated_tokens = crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
     let buf_msg = crate::dispatch::BufferedMessage {
         sender_json,
+        sender_name: sender.sender_name.clone(),
         prompt,
         extra_blocks,
         trigger_msg,
@@ -1110,45 +1111,6 @@ fn slack_file_download_url(file: &serde_json::Value) -> &str {
         .unwrap_or("")
 }
 
-/// Convert a Slack epoch-seconds timestamp (e.g. "1714204397.123456") to ISO 8601 UTC.
-/// Returns a best-effort string; falls back to the raw ts on parse failure.
-fn slack_ts_to_iso8601(ts: &str) -> String {
-    // Slack ts format: "<unix_seconds>.<microseconds>"
-    let mut parts = ts.splitn(2, '.');
-    let secs = parts.next().unwrap_or("0").parse::<u64>().unwrap_or(0);
-    // Take first 3 digits of fractional part as milliseconds
-    let frac = parts.next().unwrap_or("000");
-    let ms: u64 = frac.chars().take(3).collect::<String>().parse().unwrap_or(0);
-
-    // Manual ISO 8601 from unix timestamp (no external crate needed)
-    // Days since epoch → year/month/day
-    let total_secs = secs;
-    let days = total_secs / 86400;
-    let time_secs = total_secs % 86400;
-    let h = time_secs / 3600;
-    let m = (time_secs % 3600) / 60;
-    let s = time_secs % 60;
-
-    // Gregorian calendar calculation
-    let (year, month, day) = days_to_ymd(days);
-    format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{ms:03}Z")
-}
-
-fn days_to_ymd(days: u64) -> (u64, u64, u64) {
-    // Algorithm from https://howardhinnant.github.io/date_algorithms.html
-    let z = days + 719468;
-    let era = z / 146097;
-    let doe = z % 146097;
-    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
-    let y = yoe + era * 400;
-    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
-    let mp = (5 * doy + 2) / 153;
-    let d = doy - (153 * mp + 2) / 5 + 1;
-    let m = if mp < 10 { mp + 3 } else { mp - 9 };
-    let y = if m <= 2 { y + 1 } else { y };
-    (y, m, d)
-}
-
 /// Strip MIME parameters like `; charset=utf-8` so type-detection helpers see
 /// the bare media type. Slack occasionally sends mimetypes like
 /// `text/plain; charset=utf-8`; `media::is_text_file` expects the bare form.
diff --git a/src/timestamp.rs b/src/timestamp.rs
new file mode 100644
index 00000000..00872d19
--- /dev/null
+++ b/src/timestamp.rs
@@ -0,0 +1,88 @@
+//! ISO 8601 UTC timestamp helpers — no external crate dependency.
+//!
+//! Centralizes the Gregorian date math used by Slack (`<unix>.<usec>` ts strings)
+//! and Gateway (`SystemTime::now()`) so both adapters share one implementation.
+
+use std::time::{SystemTime, UNIX_EPOCH};
+
+/// Convert days since the Unix epoch (1970-01-01) to a Gregorian (year, month, day).
+/// Algorithm from <https://howardhinnant.github.io/date_algorithms.html>.
+fn days_to_ymd(days: u64) -> (u64, u64, u64) {
+    let z = days + 719468;
+    let era = z / 146097;
+    let doe = z % 146097;
+    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
+    let y = yoe + era * 400;
+    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+    let mp = (5 * doy + 2) / 153;
+    let d = doy - (153 * mp + 2) / 5 + 1;
+    let m = if mp < 10 { mp + 3 } else { mp - 9 };
+    let y = if m <= 2 { y + 1 } else { y };
+    (y, m, d)
+}
+
+/// Format a Unix timestamp (seconds + millis) as ISO 8601 UTC with millisecond precision.
+fn unix_to_iso8601(secs: u64, ms: u64) -> String {
+    let days = secs / 86400;
+    let time_secs = secs % 86400;
+    let h = time_secs / 3600;
+    let m = (time_secs % 3600) / 60;
+    let s = time_secs % 60;
+    let (year, month, day) = days_to_ymd(days);
+    format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}.{ms:03}Z")
+}
+
+/// Convert a Slack `ts` string ("<unix_seconds>.<microseconds>") to ISO 8601 UTC.
+/// Best-effort; falls back to epoch on parse failure.
+pub fn slack_ts_to_iso8601(ts: &str) -> String {
+    let mut parts = ts.splitn(2, '.');
+    let secs = parts.next().unwrap_or("0").parse::<u64>().unwrap_or(0);
+    let frac = parts.next().unwrap_or("000");
+    let ms: u64 = frac.chars().take(3).collect::<String>().parse().unwrap_or(0);
+    unix_to_iso8601(secs, ms)
+}
+
+/// Current wall-clock instant as ISO 8601 UTC (millisecond precision, always `.000Z`).
+pub fn now_iso8601() -> String {
+    let secs = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_secs();
+    unix_to_iso8601(secs, 0)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn slack_ts_epoch_zero() {
+        assert_eq!(slack_ts_to_iso8601("0.000000"), "1970-01-01T00:00:00.000Z");
+    }
+
+    #[test]
+    fn slack_ts_keeps_milliseconds() {
+        // 1714204397 = 2024-04-27T07:53:17 UTC; .123456 → .123 ms
+        assert_eq!(slack_ts_to_iso8601("1714204397.123456"), "2024-04-27T07:53:17.123Z");
+    }
+
+    #[test]
+    fn slack_ts_missing_fraction_uses_zero() {
+        assert_eq!(slack_ts_to_iso8601("1714204397"), "2024-04-27T07:53:17.000Z");
+    }
+
+    #[test]
+    fn slack_ts_unparseable_falls_back_to_epoch() {
+        assert_eq!(slack_ts_to_iso8601("not-a-ts"), "1970-01-01T00:00:00.000Z");
+    }
+
+    #[test]
+    fn now_iso8601_has_expected_shape() {
+        let s = now_iso8601();
+        // YYYY-MM-DDTHH:MM:SS.000Z = 24 chars
+        assert_eq!(s.len(), 24);
+        assert!(s.ends_with(".000Z"));
+        assert_eq!(&s[4..5], "-");
+        assert_eq!(&s[10..11], "T");
+    }
+}

From 81850c1f51510c65205f8fe0fd8a2ddd84e31452 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B6=85=E6=B8=A1=E6=B3=95=E5=B8=AB?= <chaodu@openab.dev>
Date: Sun, 3 May 2026 22:14:36 +0000
Subject: [PATCH 06/32] fix(dispatch): idle eviction, config validation, avoid
 clone, timestamp precision
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add 5-min idle timeout to consumer_loop to prevent per-thread handle/task
  leak (unbounded growth from one-shot thread keys like Slack non-thread msgs)
- Validate max_buffered_messages > 0 at config load time (prevents panic from
  tokio::sync::mpsc::channel(0))
- Use into_iter() in dispatch_batch to avoid deep-copying extra_blocks
  (may contain base64 image data)
- Add TODO comment for gateway multibot detection
- Use real milliseconds in now_iso8601() via dur.subsec_millis()

Co-authored-by: 超渡法師 <chaodu@openab.dev>
---
 src/config.rs    | 12 ++++++++++++
 src/dispatch.rs  | 47 ++++++++++++++++++++++++++++++++++-------------
 src/gateway.rs   |  1 +
 src/timestamp.rs | 14 +++++++-------
 4 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 4978b1ad..b6214ae3 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -504,6 +504,18 @@ fn parse_config(raw: &str, source: &str) -> anyhow::Result<Config> {
     let expanded = expand_env_vars(raw);
     let config: Config = toml::from_str(&expanded)
         .map_err(|e| anyhow::anyhow!("failed to parse config from {source}: {e}"))?;
+
+    // Validate max_buffered_messages > 0 (tokio::sync::mpsc::channel panics on 0).
+    if let Some(ref d) = config.discord {
+        anyhow::ensure!(d.max_buffered_messages > 0, "discord.max_buffered_messages must be > 0");
+    }
+    if let Some(ref s) = config.slack {
+        anyhow::ensure!(s.max_buffered_messages > 0, "slack.max_buffered_messages must be > 0");
+    }
+    if let Some(ref g) = config.gateway {
+        anyhow::ensure!(g.max_buffered_messages > 0, "gateway.max_buffered_messages must be > 0");
+    }
+
     Ok(config)
 }
 
diff --git a/src/dispatch.rs b/src/dispatch.rs
index 17000e67..64151521 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -11,10 +11,10 @@
 use std::collections::HashMap;
 use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
-use std::time::Instant;
+use std::time::{Duration, Instant};
 
 use anyhow::Result;
-use tracing::{error, info, info_span, warn};
+use tracing::{debug, error, info, info_span, warn};
 
 use crate::adapter::{AdapterRouter, ChannelRef, ChatAdapter, MessageRef};
 use crate::acp::ContentBlock;
@@ -255,6 +255,12 @@ impl Dispatcher {
 // consumer_loop
 // ---------------------------------------------------------------------------
 
+/// Idle timeout for per-thread consumer tasks. When no message arrives within
+/// this window the consumer exits, allowing `per_thread` map cleanup on the
+/// next `submit` (via `SendError` → `try_evict_locked`). Prevents unbounded
+/// task/memory growth from one-shot thread keys (e.g. Slack non-thread messages).
+const CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(300); // 5 min
+
 #[allow(clippy::too_many_arguments)]
 async fn consumer_loop(
     thread_key: String,
@@ -271,13 +277,26 @@ async fn consumer_loop(
 
     loop {
         // I1: block until at least one message arrives (zero latency for first message).
+        // Idle timeout: if no message arrives within CONSUMER_IDLE_TIMEOUT the
+        // consumer exits, freeing the task and mpsc. The next `submit` for this
+        // thread_key will observe `SendError`, evict the stale entry, and lazily
+        // spawn a fresh consumer (§2.5 generation check prevents mis-eviction).
         let first = match pending.take() {
             Some(msg) => msg,
-            None => match rx.recv().await {
-                Some(msg) => msg,
-                // All senders dropped → either shutdown() cleared the map, or
-                // cancel_buffered() removed our entry. Exit the loop.
-                None => break,
+            None => match tokio::time::timeout(CONSUMER_IDLE_TIMEOUT, rx.recv()).await {
+                Ok(Some(msg)) => msg,
+                Ok(None) => {
+                    // All senders dropped → shutdown() or cancel_buffered().
+                    break;
+                }
+                Err(_elapsed) => {
+                    debug!(
+                        thread_key = %thread_key,
+                        channel = %thread_channel.channel_id,
+                        "consumer idle timeout, exiting"
+                    );
+                    break;
+                }
             },
         };
 
@@ -340,19 +359,23 @@ async fn dispatch_batch(
     )
     .await;
 
-    // Collect per-event observability data.
+    // Collect per-event observability data (before consuming the batch).
     let tokens_per_event: Vec<usize> = batch.iter().map(|m| m.estimated_tokens).collect();
     let wait_ms: Vec<u128> = batch
         .iter()
         .map(|m| m.arrived_at.elapsed().as_millis())
         .collect();
-    let senders: Vec<&str> = batch.iter().map(|m| m.sender_name.as_str()).collect();
+    let senders: Vec<String> = batch.iter().map(|m| m.sender_name.clone()).collect();
+
+    // Anchor reactions on the last message in the batch (before consuming).
+    let trigger_msg = batch.last().unwrap().trigger_msg.clone();
 
     // Pack all arrival events into one Vec<ContentBlock> (§3.3).
+    // Uses into_iter() to avoid deep-copying extra_blocks (may contain base64 image data).
     let mut content_blocks: Vec<ContentBlock> = Vec::new();
-    for msg in &batch {
+    for msg in batch {
         let mut event_blocks =
-            AdapterRouter::pack_arrival_event(&msg.sender_json, &msg.prompt, msg.extra_blocks.clone());
+            AdapterRouter::pack_arrival_event(&msg.sender_json, &msg.prompt, msg.extra_blocks);
         content_blocks.append(&mut event_blocks);
     }
     let packed_block_count = content_blocks.len();
@@ -367,8 +390,6 @@ async fn dispatch_batch(
         return;
     }
 
-    // Anchor reactions on the last message in the batch.
-    let trigger_msg = batch.last().unwrap().trigger_msg.clone();
     let reactions_config = router.reactions_config().clone();
     let reactions = Arc::new(StatusReactionController::new(
         reactions_config.enabled,
diff --git a/src/gateway.rs b/src/gateway.rs
index f7b4d27e..de426e64 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -564,6 +564,7 @@ pub async fn run_gateway_adapter(
                                             trigger_msg,
                                             arrived_at: std::time::Instant::now(),
                                             estimated_tokens,
+                                            // TODO: implement gateway multibot detection
                                             other_bot_present: false,
                                         };
                                         if let Err(e) = dispatcher
diff --git a/src/timestamp.rs b/src/timestamp.rs
index 00872d19..485a7864 100644
--- a/src/timestamp.rs
+++ b/src/timestamp.rs
@@ -42,13 +42,12 @@ pub fn slack_ts_to_iso8601(ts: &str) -> String {
     unix_to_iso8601(secs, ms)
 }
 
-/// Current wall-clock instant as ISO 8601 UTC (millisecond precision, always `.000Z`).
+/// Current wall-clock instant as ISO 8601 UTC with millisecond precision.
 pub fn now_iso8601() -> String {
-    let secs = SystemTime::now()
+    let dur = SystemTime::now()
         .duration_since(UNIX_EPOCH)
-        .unwrap_or_default()
-        .as_secs();
-    unix_to_iso8601(secs, 0)
+        .unwrap_or_default();
+    unix_to_iso8601(dur.as_secs(), (dur.subsec_millis()) as u64)
 }
 
 #[cfg(test)]
@@ -79,10 +78,11 @@ mod tests {
     #[test]
     fn now_iso8601_has_expected_shape() {
         let s = now_iso8601();
-        // YYYY-MM-DDTHH:MM:SS.000Z = 24 chars
+        // YYYY-MM-DDTHH:MM:SS.mmmZ = 24 chars
         assert_eq!(s.len(), 24);
-        assert!(s.ends_with(".000Z"));
+        assert!(s.ends_with('Z'));
         assert_eq!(&s[4..5], "-");
         assert_eq!(&s[10..11], "T");
+        assert_eq!(&s[19..20], ".");
     }
 }

From afd6fff2c5d4c5695b9e1161b8310b09371910fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B6=85=E6=B8=A1=E6=B3=95=E5=B8=AB?= <chaodu@openab.dev>
Date: Sun, 3 May 2026 22:17:19 +0000
Subject: [PATCH 07/32] fix(dispatch): proactive stale-entry cleanup +
 transparent retry on idle exit

- submit() now checks consumer.is_finished() before using an existing
  handle, removing stale entries proactively (fixes map leak for one-shot
  thread keys that never get a second submit)
- On SendError, transparently evict + rebuild + retry once instead of
  surfacing an error to the user (fixes first-message-after-idle being
  treated as ConsumerDead)
- Only report ConsumerDead if the retry also fails (truly unexpected)
---
 src/dispatch.rs | 76 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 14 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 64151521..10a1f918 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -149,6 +149,17 @@ impl Dispatcher {
 
         let (tx, my_generation) = {
             let mut map = self.per_thread.lock().unwrap();
+
+            // Proactive stale-entry cleanup: if the consumer has exited (idle
+            // timeout or unexpected), remove the entry so `or_insert_with`
+            // creates a fresh one. Prevents map leak from one-shot thread keys
+            // and avoids the first-message-after-idle being treated as an error.
+            if let Some(handle) = map.get(&thread_key) {
+                if handle.consumer.is_finished() {
+                    map.remove(&thread_key);
+                }
+            }
+
             let entry = map.entry(thread_key.clone()).or_insert_with(|| {
                 let (tx, rx) = tokio::sync::mpsc::channel(cap);
                 let consumer = tokio::spawn(consumer_loop(
@@ -172,25 +183,62 @@ impl Dispatcher {
         };
 
         if let Err(e) = tx.send(msg).await {
-            // Consumer has exited — race-safe eviction under lock (§2.5).
+            // Consumer has exited between our check and the send — race-safe
+            // eviction under lock (§2.5), then transparent retry once.
             {
                 let mut map = self.per_thread.lock().unwrap();
                 Self::try_evict_locked(&mut map, &thread_key, my_generation);
             }
             let failed_msg = e.0;
-            let _ = adapter
-                .add_reaction(
-                    &failed_msg.trigger_msg,
-                    &self.router.reactions_config().emojis.error,
-                )
-                .await;
-            let _ = adapter
-                .send_message(
-                    &thread_channel,
-                    &format!("⚠️ {}", format_user_error("dispatch consumer exited unexpectedly")),
-                )
-                .await;
-            return Err(DispatchError::ConsumerDead);
+
+            // Retry: spawn a fresh consumer and re-send. If this also fails,
+            // surface the error to the user.
+            let retry_g = self.next_generation.fetch_add(1, Ordering::Relaxed);
+            let (retry_tx, retry_gen) = {
+                let mut map = self.per_thread.lock().unwrap();
+                let entry = map.entry(thread_key.clone()).or_insert_with(|| {
+                    let (tx, rx) = tokio::sync::mpsc::channel(cap);
+                    let consumer = tokio::spawn(consumer_loop(
+                        thread_key.clone(),
+                        thread_channel.clone(),
+                        rx,
+                        Arc::clone(&router),
+                        Arc::clone(&adapter),
+                        cap,
+                        max_tokens,
+                    ));
+                    ThreadHandle {
+                        tx,
+                        consumer,
+                        generation: retry_g,
+                        channel_id: thread_channel.channel_id.clone(),
+                        adapter_kind: adapter.platform().to_string(),
+                    }
+                });
+                (entry.tx.clone(), entry.generation)
+            };
+
+            if let Err(e2) = retry_tx.send(failed_msg).await {
+                // Retry also failed — truly unexpected. Surface error.
+                {
+                    let mut map = self.per_thread.lock().unwrap();
+                    Self::try_evict_locked(&mut map, &thread_key, retry_gen);
+                }
+                let failed_msg = e2.0;
+                let _ = adapter
+                    .add_reaction(
+                        &failed_msg.trigger_msg,
+                        &self.router.reactions_config().emojis.error,
+                    )
+                    .await;
+                let _ = adapter
+                    .send_message(
+                        &thread_channel,
+                        &format!("⚠️ {}", format_user_error("dispatch consumer exited unexpectedly")),
+                    )
+                    .await;
+                return Err(DispatchError::ConsumerDead);
+            }
         }
         Ok(())
     }

From 472d6d98043298dfa76691a860b7b3de277be935 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B6=85=E6=B8=A1=E6=B3=95=E5=B8=AB?= <chaodu@openab.dev>
Date: Sun, 3 May 2026 22:19:53 +0000
Subject: [PATCH 08/32] fix(dispatch): periodic sweep of stale per-thread
 entries

- Add Dispatcher::sweep_stale() that retains only entries whose consumer
  task is still running (map.retain + is_finished check)
- Wire into main.rs cleanup task (60s interval, alongside pool.cleanup_idle)
- Prevents unbounded map growth from one-shot thread keys (e.g. Slack
  non-thread messages) that never receive a second submit()
- dispatchers Vec wrapped in Arc<Mutex<>> so cleanup task can access it
---
 src/dispatch.rs | 11 +++++++++++
 src/main.rs     | 18 ++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 10a1f918..366324c2 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -280,6 +280,17 @@ impl Dispatcher {
         false
     }
 
+    /// Remove map entries whose consumer task has finished (idle timeout or
+    /// unexpected exit). Called periodically from the cleanup task in main.rs
+    /// to prevent unbounded map growth from one-shot thread keys that never
+    /// receive a second `submit()`. Returns the number of entries swept.
+    pub fn sweep_stale(&self) -> usize {
+        let mut map = self.per_thread.lock().unwrap();
+        let before = map.len();
+        map.retain(|_, handle| !handle.consumer.is_finished());
+        before - map.len()
+    }
+
     /// Log buffered-message counts and drop all handles (called on SIGTERM).
     pub fn shutdown(&self) {
         let mut map = self.per_thread.lock().unwrap();
diff --git a/src/main.rs b/src/main.rs
index 53f81253..2e26f6f5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -22,7 +22,7 @@ use serenity::gateway::GatewayError;
 use serenity::prelude::*;
 use std::collections::HashSet;
 use std::path::PathBuf;
-use std::sync::Arc;
+use std::sync::{Arc, Mutex};
 use tracing::{error, info, warn};
 
 /// Wait for SIGINT (ctrl_c) or, on unix, SIGTERM. SIGTERM is what Kubernetes
@@ -145,14 +145,20 @@ async fn main() -> anyhow::Result<()> {
     let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);
 
     // Dispatcher handles tracked here so SIGTERM cleanup can call shutdown() on each (ADR §6.8).
-    let mut dispatchers: Vec<Arc<dispatch::Dispatcher>> = Vec::new();
+    // Also shared with the cleanup task for periodic stale-entry sweeping.
+    let dispatchers: Arc<Mutex<Vec<Arc<dispatch::Dispatcher>>>> = Arc::new(Mutex::new(Vec::new()));
 
     // Spawn cleanup task
     let cleanup_pool = pool.clone();
+    let cleanup_dispatchers = dispatchers.clone();
     let cleanup_handle = tokio::spawn(async move {
         loop {
             tokio::time::sleep(std::time::Duration::from_secs(60)).await;
             cleanup_pool.cleanup_idle(ttl_secs).await;
+            // Sweep stale per-thread dispatcher entries (idle-exited consumers).
+            for d in cleanup_dispatchers.lock().unwrap().iter() {
+                d.sweep_stale();
+            }
         }
     });
 
@@ -204,7 +210,7 @@ async fn main() -> anyhow::Result<()> {
             slack_cap,
             slack_cfg.max_batch_tokens,
         ));
-        dispatchers.push(slack_dispatcher.clone());
+        dispatchers.lock().unwrap().push(slack_dispatcher.clone());
         Some(tokio::spawn(async move {
             if let Err(e) = slack::run_slack_adapter(
                 adapter,
@@ -244,7 +250,7 @@ async fn main() -> anyhow::Result<()> {
             gw_cap,
             gw_cfg.max_batch_tokens,
         ));
-        dispatchers.push(gw_dispatcher.clone());
+        dispatchers.lock().unwrap().push(gw_dispatcher.clone());
         let params = gateway::GatewayParams {
             url: gw_cfg.url,
             platform: gw_cfg.platform,
@@ -337,7 +343,7 @@ async fn main() -> anyhow::Result<()> {
             discord_cap,
             discord_cfg.max_batch_tokens,
         ));
-        dispatchers.push(discord_dispatcher.clone());
+        dispatchers.lock().unwrap().push(discord_dispatcher.clone());
 
         let handler = discord::Handler {
             router,
@@ -418,7 +424,7 @@ async fn main() -> anyhow::Result<()> {
         let _ = tokio::time::timeout(std::time::Duration::from_secs(35), handle).await;
     }
     // Drain per-thread dispatchers and log buffered_lost counts before pool shutdown (ADR §6.8).
-    for d in &dispatchers {
+    for d in dispatchers.lock().unwrap().iter() {
         d.shutdown();
     }
     let shutdown_pool = pool;

From e0dd269d6538ff5d87da1ac3616493bcb7b2af4d Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 04:35:23 +0000
Subject: [PATCH 09/32] feat(dispatch): add per-lane batching mode (default for
 "batched" alias)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends MessageProcessingMode from {PerMessage, Batched} to three values:
- PerMessage: each message → one ACP turn (unchanged default behaviour)
- PerThread:  thread-wide buffer, all senders share one batch (old "Batched")
- PerLane:    per (thread, sender) buffer, each sender gets its own ACP turn

The legacy alias "batched" now resolves to PerLane — the recommended default
for batching, since per-lane eliminates the silent-drop risk where a single
mixed-sender ACP turn produces one reply that may forget to address some
senders. Existing configs continue to load without change but now run under
per-lane semantics.

Implementation:
- Adds BatchGrouping enum to dispatch.rs and `Dispatcher::key()` helper that
  builds the per-thread map key from (platform, thread_id, sender_id).
  PerThread mode ignores sender_id; PerLane includes it.
- main.rs translates MessageProcessingMode to (cap, BatchGrouping) when
  constructing each platform's Dispatcher.
- Discord/Slack/Gateway adapters use `dispatcher.key(...)` instead of
  hand-rolled format!() at submit and slash-command sites.
- Session pool keys remain per-thread (unchanged) — the ACP session is
  shared across lanes by design; turns serialise through the shared session.
- /cancel-all and /reset use the invoker's lane key (B1: cancel only own
  lane) but still cancel/reset the shared session (B4-a: keep escape hatch
  from a runaway in-flight turn).

Tests:
- dispatch::tests::key_per_thread_ignores_sender / key_per_lane_includes_sender
- config::tests::message_processing_mode_{parses_per_message,parses_per_thread,
  parses_per_lane,batched_alias_is_per_lane,default_is_per_message,
  unknown_value_errors}
- 224 tests passing.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/config.rs   | 114 +++++++++++++++++++++++++++++++++++++++++++++---
 src/discord.rs  |  30 ++++++++++---
 src/dispatch.rs |  74 +++++++++++++++++++++++++++++++
 src/gateway.rs  |  14 +++---
 src/main.rs     |  38 +++++++++++-----
 src/slack.rs    |   9 ++--
 6 files changed, 246 insertions(+), 33 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index b6214ae3..4511d494 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -7,22 +7,32 @@ use std::path::Path;
 /// Controls how incoming messages are dispatched to ACP turns.
 ///
 /// - `PerMessage` (default): each message becomes its own ACP turn (v0.8.2-beta.1 behaviour).
-/// - `Batched`: messages that arrive while a turn is in flight are buffered and merged
-///   into one ACP turn at the next turn boundary (see ADR: turn-boundary-batching-adr.md).
+/// - `PerThread`: one buffer per thread; all senders in a thread share a single batch and
+///   produce one ACP turn per turn boundary.
+/// - `PerLane`: one buffer per (thread, sender); each sender batches independently and gets
+///   its own ACP turn — no silent-drop risk when multiple senders address the same thread.
+///
+/// The legacy alias `"batched"` resolves to `PerLane` (the recommended batching default).
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub enum MessageProcessingMode {
     #[default]
     PerMessage,
-    Batched,
+    PerThread,
+    PerLane,
 }
 
 impl<'de> Deserialize<'de> for MessageProcessingMode {
     fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
         let s = String::deserialize(deserializer)?;
         match s.to_lowercase().replace('-', "_").as_str() {
-            "per_message" | "per-message" => Ok(Self::PerMessage),
-            "batched" => Ok(Self::Batched),
-            other => Err(serde::de::Error::unknown_variant(other, &["per-message", "batched"])),
+            "per_message" => Ok(Self::PerMessage),
+            "per_thread" => Ok(Self::PerThread),
+            // "batched" is a legacy alias for PerLane — the new default batching mode.
+            "per_lane" | "batched" => Ok(Self::PerLane),
+            other => Err(serde::de::Error::unknown_variant(
+                other,
+                &["per-message", "per-thread", "per-lane"],
+            )),
         }
     }
 }
@@ -648,6 +658,98 @@ command = "echo"
         assert_eq!(ToolDisplay::default(), ToolDisplay::Full);
     }
 
+    #[test]
+    fn message_processing_mode_parses_per_message() {
+        let toml = r#"
+[discord]
+bot_token = "t"
+message_processing_mode = "per-message"
+
+[agent]
+command = "echo"
+"#;
+        let cfg = parse_config(toml, "test").unwrap();
+        assert_eq!(
+            cfg.discord.unwrap().message_processing_mode,
+            MessageProcessingMode::PerMessage
+        );
+    }
+
+    #[test]
+    fn message_processing_mode_parses_per_thread() {
+        let toml = r#"
+[discord]
+bot_token = "t"
+message_processing_mode = "per-thread"
+
+[agent]
+command = "echo"
+"#;
+        let cfg = parse_config(toml, "test").unwrap();
+        assert_eq!(
+            cfg.discord.unwrap().message_processing_mode,
+            MessageProcessingMode::PerThread
+        );
+    }
+
+    #[test]
+    fn message_processing_mode_parses_per_lane() {
+        let toml = r#"
+[discord]
+bot_token = "t"
+message_processing_mode = "per-lane"
+
+[agent]
+command = "echo"
+"#;
+        let cfg = parse_config(toml, "test").unwrap();
+        assert_eq!(
+            cfg.discord.unwrap().message_processing_mode,
+            MessageProcessingMode::PerLane
+        );
+    }
+
+    // Legacy alias: "batched" used to mean per-thread; it now resolves to PerLane
+    // (the recommended batching default). Existing configs continue to load.
+    #[test]
+    fn message_processing_mode_batched_alias_is_per_lane() {
+        let toml = r#"
+[discord]
+bot_token = "t"
+message_processing_mode = "batched"
+
+[agent]
+command = "echo"
+"#;
+        let cfg = parse_config(toml, "test").unwrap();
+        assert_eq!(
+            cfg.discord.unwrap().message_processing_mode,
+            MessageProcessingMode::PerLane
+        );
+    }
+
+    #[test]
+    fn message_processing_mode_default_is_per_message() {
+        let cfg = parse_config(MINIMAL_TOML, "test").unwrap();
+        assert_eq!(
+            cfg.discord.unwrap().message_processing_mode,
+            MessageProcessingMode::PerMessage
+        );
+    }
+
+    #[test]
+    fn message_processing_mode_unknown_value_errors() {
+        let toml = r#"
+[discord]
+bot_token = "t"
+message_processing_mode = "bogus"
+
+[agent]
+command = "echo"
+"#;
+        assert!(parse_config(toml, "test").is_err());
+    }
+
     #[test]
     fn parse_gateway_config_explicit_allow_all_overrides_list() {
         let toml = r#"
diff --git a/src/discord.rs b/src/discord.rs
index ac5574a6..9f6acdcd 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -641,9 +641,11 @@ impl EventHandler for Handler {
         let dispatcher = self.dispatcher.clone();
 
         tokio::spawn(async move {
+            let sender_id = sender.sender_id.clone();
             let sender_name = sender.sender_name.clone();
             let sender_json = serde_json::to_string(&sender).unwrap();
-            let thread_key = format!("discord:{}", thread_channel.channel_id);
+            let thread_key =
+                dispatcher.key("discord", &thread_channel.channel_id, &sender_id);
             let estimated_tokens =
                 crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
             let buf_msg = crate::dispatch::BufferedMessage {
@@ -884,11 +886,20 @@ impl Handler {
         ctx: &Context,
         cmd: &serenity::model::application::CommandInteraction,
     ) {
-        let thread_key = format!("discord:{}", cmd.channel_id.get());
+        // Session pool key is always per-thread (B4-a: cancel the in-flight ACP turn even
+        // in PerLane mode — it's the only escape hatch from a runaway turn, and the
+        // shared session means there's only one in-flight turn anyway).
+        let session_key = format!("discord:{}", cmd.channel_id.get());
+        // Dispatcher key follows the configured grouping. In PerLane mode this scopes
+        // the buffer drop to the invoker's own lane (B1=X).
+        let invoker_id = cmd.user.id.to_string();
+        let dispatcher_key = self
+            .dispatcher
+            .key("discord", &cmd.channel_id.get().to_string(), &invoker_id);
 
-        let dropped = self.dispatcher.cancel_buffered(&thread_key);
+        let dropped = self.dispatcher.cancel_buffered(&dispatcher_key);
 
-        let cancel_result = self.router.pool().cancel_session(&thread_key).await;
+        let cancel_result = self.router.pool().cancel_session(&session_key).await;
 
         let msg = match (cancel_result, dropped) {
             (Ok(()), 0) => "🛑 Cancel signal sent.".to_string(),
@@ -910,12 +921,17 @@ impl Handler {
         ctx: &Context,
         cmd: &serenity::model::application::CommandInteraction,
     ) {
-        let thread_key = format!("discord:{}", cmd.channel_id.get());
+        // Session pool key is always per-thread (see /cancel-all for rationale).
+        let session_key = format!("discord:{}", cmd.channel_id.get());
+        let invoker_id = cmd.user.id.to_string();
+        let dispatcher_key = self
+            .dispatcher
+            .key("discord", &cmd.channel_id.get().to_string(), &invoker_id);
 
         // /reset is a superset of /cancel-all: drop buffered work, then tear down the session.
-        let dropped = self.dispatcher.cancel_buffered(&thread_key);
+        let dropped = self.dispatcher.cancel_buffered(&dispatcher_key);
 
-        let result = self.router.pool().reset_session(&thread_key).await;
+        let result = self.router.pool().reset_session(&session_key).await;
 
         let msg = match result {
             Ok(()) if dropped > 0 => {
diff --git a/src/dispatch.rs b/src/dispatch.rs
index 366324c2..02669cc1 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -48,6 +48,22 @@ pub struct BufferedMessage {
     pub other_bot_present: bool,
 }
 
+/// How `thread_key` is built for the dispatcher's per-thread map.
+///
+/// - `PerThread`: one mpsc per thread → all senders in a thread share one batch → one
+///   ACP turn per batch (cheaper, but risks silent drop when the agent's single reply
+///   forgets to address some senders).
+/// - `PerLane`: one mpsc per (thread, sender) → each sender batches independently and
+///   gets a dedicated ACP turn. Sessions are still shared per-thread; turns serialise
+///   through the shared session.
+///
+/// Derived from `config::MessageProcessingMode` in `main.rs`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum BatchGrouping {
+    PerThread,
+    PerLane,
+}
+
 /// Error returned by `Dispatcher::submit`.
 #[derive(Debug)]
 pub enum DispatchError {
@@ -104,6 +120,7 @@ pub struct Dispatcher {
     router: Arc<AdapterRouter>,
     max_buffered_messages: usize,
     max_batch_tokens: usize,
+    grouping: BatchGrouping,
 }
 
 impl Dispatcher {
@@ -111,6 +128,7 @@ impl Dispatcher {
         router: Arc<AdapterRouter>,
         max_buffered_messages: usize,
         max_batch_tokens: usize,
+        grouping: BatchGrouping,
     ) -> Self {
         Self {
             per_thread: Mutex::new(HashMap::new()),
@@ -118,6 +136,22 @@ impl Dispatcher {
             router,
             max_buffered_messages,
             max_batch_tokens,
+            grouping,
+        }
+    }
+
+    /// Build the dispatcher key for a (platform, thread, sender) tuple.
+    ///
+    /// In `PerThread` mode the sender is ignored; in `PerLane` mode the sender is appended
+    /// so each (thread, sender) pair gets its own mpsc and consumer.
+    ///
+    /// Note: this is the *dispatcher* key, not the *session pool* key. Session pool keys
+    /// are always `<platform>:<thread_id>` regardless of grouping (the ACP session is
+    /// shared per-thread by design).
+    pub fn key(&self, platform: &str, thread_id: &str, sender_id: &str) -> String {
+        match self.grouping {
+            BatchGrouping::PerThread => format!("{platform}:{thread_id}"),
+            BatchGrouping::PerLane => format!("{platform}:{thread_id}:{sender_id}"),
         }
     }
 
@@ -808,4 +842,44 @@ mod tests {
         let mut map: HashMap<String, ThreadHandle> = HashMap::new();
         assert!(!Dispatcher::try_evict_locked(&mut map, "missing", 0));
     }
+
+    // BatchGrouping → thread_key shape.
+    fn make_dispatcher(grouping: BatchGrouping) -> Dispatcher {
+        // The router is wrapped in Arc but never used by `key()` itself; we use
+        // a dummy AdapterRouter built via the same path main.rs would use.
+        // For a pure-keying test we'd ideally not need it, but the constructor demands one.
+        // Construct a minimal router via the public test helpers in adapter.rs if available;
+        // otherwise we fall back to building one with a dummy SessionPool.
+        use crate::acp::SessionPool;
+        let agent_cfg = crate::config::AgentConfig {
+            command: "/bin/true".into(),
+            args: vec![],
+            working_dir: "/tmp".into(),
+            env: std::collections::HashMap::new(),
+            inherit_env: vec![],
+        };
+        let pool = Arc::new(SessionPool::new(agent_cfg, 1));
+        let router = Arc::new(AdapterRouter::new(
+            pool,
+            crate::config::ReactionsConfig::default(),
+            crate::markdown::TableMode::Off,
+        ));
+        Dispatcher::new(router, 10, 24_000, grouping)
+    }
+
+    #[tokio::test]
+    async fn key_per_thread_ignores_sender() {
+        let d = make_dispatcher(BatchGrouping::PerThread);
+        assert_eq!(d.key("discord", "T1", "userA"), "discord:T1");
+        assert_eq!(d.key("discord", "T1", "userB"), "discord:T1");
+    }
+
+    #[tokio::test]
+    async fn key_per_lane_includes_sender() {
+        let d = make_dispatcher(BatchGrouping::PerLane);
+        assert_eq!(d.key("discord", "T1", "userA"), "discord:T1:userA");
+        assert_eq!(d.key("discord", "T1", "userB"), "discord:T1:userB");
+        // Different threads remain distinct.
+        assert_eq!(d.key("slack", "T2", "userA"), "slack:T2:userA");
+    }
 }
diff --git a/src/gateway.rs b/src/gateway.rs
index de426e64..1502adc6 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -505,6 +505,7 @@ pub async fn run_gateway_adapter(
                                     let adapter = adapter.clone();
                                     let prompt = event.content.text.clone();
                                     let sender_name = event.sender.name.clone();
+                                    let sender_id = event.sender.id.clone();
                                     let dispatcher = dispatcher.clone();
 
                                     // Slash command interception for gateway platforms
@@ -548,11 +549,14 @@ pub async fn run_gateway_adapter(
                                             channel.clone()
                                         };
 
-                                        let thread_key = format!(
-                                            "{}:{}",
-                                            thread_channel.platform,
-                                            thread_channel.thread_id.as_deref()
-                                                .unwrap_or(&thread_channel.channel_id)
+                                        let thread_id = thread_channel
+                                            .thread_id
+                                            .as_deref()
+                                            .unwrap_or(&thread_channel.channel_id);
+                                        let thread_key = dispatcher.key(
+                                            &thread_channel.platform,
+                                            thread_id,
+                                            &sender_id,
                                         );
                                         let estimated_tokens =
                                             crate::dispatch::estimate_tokens(&prompt, &[]);
diff --git a/src/main.rs b/src/main.rs
index 2e26f6f5..84e3d871 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -199,16 +199,22 @@ async fn main() -> anyhow::Result<()> {
         let max_bot_turns = slack_cfg.max_bot_turns;
         let slack_shutdown_rx = shutdown_rx.clone();
         let adapter = shared_slack_adapter.clone().expect("shared_slack_adapter must exist when slack config is present");
-        // Dispatcher is the sole serialization path for both modes. PerMessage =
-        // cap 1 (each message dispatches alone, FIFO), Batched = configured cap.
-        let slack_cap = match slack_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage => 1,
-            config::MessageProcessingMode::Batched => slack_cfg.max_buffered_messages,
+        // Dispatcher is the sole serialization path for all modes. PerMessage = cap 1
+        // (each message dispatches alone, FIFO). PerThread / PerLane = configured cap;
+        // grouping decides whether senders share a buffer or get their own lane.
+        let (slack_cap, slack_grouping) = match slack_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage =>
+                (1, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerThread =>
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerLane =>
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
         };
         let slack_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),
             slack_cap,
             slack_cfg.max_batch_tokens,
+            slack_grouping,
         ));
         dispatchers.lock().unwrap().push(slack_dispatcher.clone());
         Some(tokio::spawn(async move {
@@ -241,14 +247,19 @@ async fn main() -> anyhow::Result<()> {
         let router = router.clone();
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
-        let gw_cap = match gw_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage => 1,
-            config::MessageProcessingMode::Batched => gw_cfg.max_buffered_messages,
+        let (gw_cap, gw_grouping) = match gw_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage =>
+                (1, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerThread =>
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerLane =>
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
         };
         let gw_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),
             gw_cap,
             gw_cfg.max_batch_tokens,
+            gw_grouping,
         ));
         dispatchers.lock().unwrap().push(gw_dispatcher.clone());
         let params = gateway::GatewayParams {
@@ -334,14 +345,19 @@ async fn main() -> anyhow::Result<()> {
             "starting discord adapter"
         );
 
-        let discord_cap = match discord_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage => 1,
-            config::MessageProcessingMode::Batched => discord_cfg.max_buffered_messages,
+        let (discord_cap, discord_grouping) = match discord_cfg.message_processing_mode {
+            config::MessageProcessingMode::PerMessage =>
+                (1, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerThread =>
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
+            config::MessageProcessingMode::PerLane =>
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
         };
         let discord_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),
             discord_cap,
             discord_cfg.max_batch_tokens,
+            discord_grouping,
         ));
         dispatchers.lock().unwrap().push(discord_dispatcher.clone());
 
diff --git a/src/slack.rs b/src/slack.rs
index 96390ccb..015bf978 100644
--- a/src/slack.rs
+++ b/src/slack.rs
@@ -1067,10 +1067,11 @@ async fn handle_message(
         thread_channel.thread_id.as_deref()
             .is_some_and(|ts| cache.get(ts).is_some_and(|inst| inst.elapsed() < adapter.session_ttl))
     };
-    let thread_key = format!(
-        "slack:{}",
-        thread_channel.thread_id.as_deref().unwrap_or(&thread_channel.channel_id)
-    );
+    let thread_id = thread_channel
+        .thread_id
+        .as_deref()
+        .unwrap_or(&thread_channel.channel_id);
+    let thread_key = dispatcher.key("slack", thread_id, &sender.sender_id);
     let estimated_tokens = crate::dispatch::estimate_tokens(&prompt, &extra_blocks);
     let buf_msg = crate::dispatch::BufferedMessage {
         sender_json,

From a6fb806a8d420ea15a3c9f60cd5a71a4899c8406 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 05:50:44 +0000
Subject: [PATCH 10/32] fix(dispatch): /reset and /cancel-all clear all lanes
 in thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the per-key Dispatcher::cancel_buffered with cancel_buffered_thread,
which prefix-matches every per-thread handle for a (platform, thread_id) pair
and aborts each consumer. Both PerThread keys (`platform:thread`) and PerLane
keys (`platform:thread:sender`) are dropped, with care taken to avoid the
substring trap (T1 must not match T10).

Behaviour:
- /cancel: unchanged — stop in-flight ACP turn only, queue continues.
- /cancel-all: stop in-flight + drop every lane's buffer in the thread (was:
  invoker's lane only). The nuclear escape hatch — keeps ACP context, clears
  queued work so a human can intervene.
- /reset: drop every lane's buffer + tear down the ACP session (was:
  invoker's lane only). Next message in the thread starts a fresh session.

Gateway:
- run_gateway_adapter now also receives the AdapterRouter, so the upstream
  /reset and /cancel slash-command interception (added on main while this
  branch was in review) compiles after rebase.
- Gateway /reset gets the same all-lanes drop as Discord; /cancel keeps the
  in-flight-only semantics from upstream.
- /cancel-all is intentionally not added to the gateway interception path.

Tests: 227 passing (+3 new dispatcher tests covering PerThread drop,
PerLane all-lanes drop, and the T1-vs-T10 prefix-collision guard).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/discord.rs  | 25 +++++----------
 src/dispatch.rs | 85 ++++++++++++++++++++++++++++++++++++++++++-------
 src/gateway.rs  | 15 ++++++---
 src/main.rs     |  3 +-
 4 files changed, 94 insertions(+), 34 deletions(-)

diff --git a/src/discord.rs b/src/discord.rs
index 9f6acdcd..148b517c 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -886,18 +886,12 @@ impl Handler {
         ctx: &Context,
         cmd: &serenity::model::application::CommandInteraction,
     ) {
-        // Session pool key is always per-thread (B4-a: cancel the in-flight ACP turn even
-        // in PerLane mode — it's the only escape hatch from a runaway turn, and the
-        // shared session means there's only one in-flight turn anyway).
+        // /cancel-all is the nuclear escape hatch: stop the in-flight turn AND clear
+        // every lane's buffer in this thread, so a human can intervene from a clean slate.
         let session_key = format!("discord:{}", cmd.channel_id.get());
-        // Dispatcher key follows the configured grouping. In PerLane mode this scopes
-        // the buffer drop to the invoker's own lane (B1=X).
-        let invoker_id = cmd.user.id.to_string();
-        let dispatcher_key = self
+        let dropped = self
             .dispatcher
-            .key("discord", &cmd.channel_id.get().to_string(), &invoker_id);
-
-        let dropped = self.dispatcher.cancel_buffered(&dispatcher_key);
+            .cancel_buffered_thread("discord", &cmd.channel_id.get().to_string());
 
         let cancel_result = self.router.pool().cancel_session(&session_key).await;
 
@@ -921,15 +915,12 @@ impl Handler {
         ctx: &Context,
         cmd: &serenity::model::application::CommandInteraction,
     ) {
-        // Session pool key is always per-thread (see /cancel-all for rationale).
+        // /reset clears every lane's buffer in this thread and tears down the shared
+        // ACP session — the next message in the thread starts a fresh conversation.
         let session_key = format!("discord:{}", cmd.channel_id.get());
-        let invoker_id = cmd.user.id.to_string();
-        let dispatcher_key = self
+        let dropped = self
             .dispatcher
-            .key("discord", &cmd.channel_id.get().to_string(), &invoker_id);
-
-        // /reset is a superset of /cancel-all: drop buffered work, then tear down the session.
-        let dropped = self.dispatcher.cancel_buffered(&dispatcher_key);
+            .cancel_buffered_thread("discord", &cmd.channel_id.get().to_string());
 
         let result = self.router.pool().reset_session(&session_key).await;
 
diff --git a/src/dispatch.rs b/src/dispatch.rs
index 02669cc1..ffbd1312 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -277,22 +277,34 @@ impl Dispatcher {
         Ok(())
     }
 
-    /// Drop the per-thread handle and abort its consumer, discarding any messages
-    /// buffered at cancel time (§2.5 / §4.4). Returns the approximate number of
-    /// messages that were buffered (mpsc length at removal time).
+    /// Drop all per-thread handles whose key belongs to `(platform, thread_id)`,
+    /// regardless of grouping, and abort each consumer (§2.5 / §4.4). Returns
+    /// the total number of buffered messages discarded across all lanes.
+    ///
+    /// Matches both PerThread keys (`<platform>:<thread_id>`) and PerLane keys
+    /// (`<platform>:<thread_id>:<sender_id>`). Used by `/reset` and
+    /// `/cancel-all` to clear the entire thread, not just one lane.
     ///
     /// Disjoint from SendError recovery: removal happens *before* abort, so any
     /// fresh `submit` after this returns lands on a lazily-constructed new handle
     /// instead of observing `SendError`.
-    pub fn cancel_buffered(&self, thread_key: &str) -> usize {
+    pub fn cancel_buffered_thread(&self, platform: &str, thread_id: &str) -> usize {
+        let prefix = format!("{platform}:{thread_id}");
+        let lane_prefix = format!("{prefix}:");
         let mut map = self.per_thread.lock().unwrap();
-        if let Some(handle) = map.remove(thread_key) {
-            let pending = handle.pending_count();
-            handle.consumer.abort();
-            pending
-        } else {
-            0
+        let keys: Vec<String> = map
+            .keys()
+            .filter(|k| k.as_str() == prefix || k.starts_with(&lane_prefix))
+            .cloned()
+            .collect();
+        let mut dropped = 0;
+        for k in keys {
+            if let Some(handle) = map.remove(&k) {
+                dropped += handle.pending_count();
+                handle.consumer.abort();
+            }
         }
+        dropped
     }
 
     /// §2.5 race-safe eviction. Caller must hold the `per_thread` mutex.
@@ -379,7 +391,7 @@ async fn consumer_loop(
             None => match tokio::time::timeout(CONSUMER_IDLE_TIMEOUT, rx.recv()).await {
                 Ok(Some(msg)) => msg,
                 Ok(None) => {
-                    // All senders dropped → shutdown() or cancel_buffered().
+                    // All senders dropped → shutdown() or cancel_buffered_thread().
                     break;
                 }
                 Err(_elapsed) => {
@@ -882,4 +894,55 @@ mod tests {
         // Different threads remain distinct.
         assert_eq!(d.key("slack", "T2", "userA"), "slack:T2:userA");
     }
+
+    fn insert_dummy_handle(d: &Dispatcher, key: &str) {
+        let (tx, _rx) = tokio::sync::mpsc::channel::<BufferedMessage>(10);
+        let consumer = tokio::spawn(async {});
+        let handle = ThreadHandle {
+            tx,
+            consumer,
+            generation: 0,
+            channel_id: "c".into(),
+            adapter_kind: "discord".into(),
+        };
+        d.per_thread.lock().unwrap().insert(key.to_string(), handle);
+    }
+
+    #[tokio::test]
+    async fn cancel_buffered_thread_drops_per_thread_key() {
+        let d = make_dispatcher(BatchGrouping::PerThread);
+        insert_dummy_handle(&d, "discord:T1");
+        insert_dummy_handle(&d, "discord:T2"); // different thread, must survive
+        assert_eq!(d.cancel_buffered_thread("discord", "T1"), 0); // no buffered msgs
+        let map = d.per_thread.lock().unwrap();
+        assert!(!map.contains_key("discord:T1"));
+        assert!(map.contains_key("discord:T2"));
+    }
+
+    #[tokio::test]
+    async fn cancel_buffered_thread_drops_all_lanes() {
+        let d = make_dispatcher(BatchGrouping::PerLane);
+        insert_dummy_handle(&d, "discord:T1:userA");
+        insert_dummy_handle(&d, "discord:T1:userB");
+        insert_dummy_handle(&d, "discord:T2:userA"); // different thread
+        insert_dummy_handle(&d, "slack:T1:userA");   // different platform
+        d.cancel_buffered_thread("discord", "T1");
+        let map = d.per_thread.lock().unwrap();
+        assert!(!map.contains_key("discord:T1:userA"));
+        assert!(!map.contains_key("discord:T1:userB"));
+        assert!(map.contains_key("discord:T2:userA"));
+        assert!(map.contains_key("slack:T1:userA"));
+    }
+
+    #[tokio::test]
+    async fn cancel_buffered_thread_does_not_match_thread_id_prefix() {
+        // T1 must not match T10 / T11 (substring trap).
+        let d = make_dispatcher(BatchGrouping::PerLane);
+        insert_dummy_handle(&d, "discord:T1:userA");
+        insert_dummy_handle(&d, "discord:T10:userA");
+        d.cancel_buffered_thread("discord", "T1");
+        let map = d.per_thread.lock().unwrap();
+        assert!(!map.contains_key("discord:T1:userA"));
+        assert!(map.contains_key("discord:T10:userA"));
+    }
 }
diff --git a/src/gateway.rs b/src/gateway.rs
index 1502adc6..28799f1b 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -350,6 +350,7 @@ pub async fn run_gateway_adapter(
     params: GatewayParams,
     mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
     dispatcher: Arc<crate::dispatch::Dispatcher>,
+    router: Arc<crate::adapter::AdapterRouter>,
 ) -> Result<()> {
     let platform: &'static str = Box::leak(params.platform.into_boxed_str());
 
@@ -514,12 +515,16 @@ pub async fn run_gateway_adapter(
                                     // need message_id for streaming edits.
                                     let trimmed = prompt.trim();
                                     if trimmed == "/reset" {
-                                        let thread_key = format!("{}:{}", event.platform, event.channel.thread_id.as_deref().unwrap_or(&event.channel.id));
-                                        let msg = match router.pool().reset_session(&thread_key).await {
-                                            Ok(()) => "🔄 Session reset. Start a new conversation!",
-                                            Err(_) => "⚠️ No active session to reset.",
+                                        let thread_id_str = event.channel.thread_id.as_deref().unwrap_or(&event.channel.id);
+                                        let thread_key = format!("{}:{}", event.platform, thread_id_str);
+                                        let dropped = dispatcher.cancel_buffered_thread(event.platform.as_str(), thread_id_str);
+                                        let msg = match (router.pool().reset_session(&thread_key).await, dropped) {
+                                            (Ok(()), 0) => "🔄 Session reset. Start a new conversation!".to_string(),
+                                            (Ok(()), n) => format!("🔄 Session reset. Dropped {n} buffered message(s). Start a new conversation!"),
+                                            (Err(_), 0) => "⚠️ No active session to reset.".to_string(),
+                                            (Err(_), n) => format!("🔄 Dropped {n} buffered message(s). No active session to reset."),
                                         };
-                                        let _ = send_fire_and_forget(&slash_ws_tx, &channel, msg).await;
+                                        let _ = send_fire_and_forget(&slash_ws_tx, &channel, &msg).await;
                                         continue;
                                     }
                                     if trimmed == "/cancel" {
diff --git a/src/main.rs b/src/main.rs
index 84e3d871..a3be793e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -273,8 +273,9 @@ async fn main() -> anyhow::Result<()> {
             allowed_users: gw_cfg.allowed_users,
             streaming: gw_cfg.streaming,
         };
+        let gw_router = router.clone();
         Some(tokio::spawn(async move {
-            if let Err(e) = gateway::run_gateway_adapter(params, shutdown_rx, gw_dispatcher).await {
+            if let Err(e) = gateway::run_gateway_adapter(params, shutdown_rx, gw_dispatcher, gw_router).await {
                 error!("gateway adapter error: {e}");
             }
         }))

From 2d2ce952224842bd9e07d96a835210d1002514d0 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 06:06:47 +0000
Subject: [PATCH 11/32] feat(config)!: drop "batched" alias, only
 per-message/per-thread/per-lane accepted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The legacy `"batched"` value (which resolved to PerLane on this branch) is
removed. Configs using `message_processing_mode = "batched"` will now fail
to parse with an `unknown variant "batched"` error pointing at the three
accepted values, forcing an explicit migration to per-thread or per-lane.

The two batching modes have meaningfully different semantics (shared vs
isolated buffer per sender), so a silent default is the wrong call —
users should pick deliberately.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/config.rs | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 4511d494..5b1681e0 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -11,8 +11,6 @@ use std::path::Path;
 ///   produce one ACP turn per turn boundary.
 /// - `PerLane`: one buffer per (thread, sender); each sender batches independently and gets
 ///   its own ACP turn — no silent-drop risk when multiple senders address the same thread.
-///
-/// The legacy alias `"batched"` resolves to `PerLane` (the recommended batching default).
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub enum MessageProcessingMode {
     #[default]
@@ -27,8 +25,7 @@ impl<'de> Deserialize<'de> for MessageProcessingMode {
         match s.to_lowercase().replace('-', "_").as_str() {
             "per_message" => Ok(Self::PerMessage),
             "per_thread" => Ok(Self::PerThread),
-            // "batched" is a legacy alias for PerLane — the new default batching mode.
-            "per_lane" | "batched" => Ok(Self::PerLane),
+            "per_lane" => Ok(Self::PerLane),
             other => Err(serde::de::Error::unknown_variant(
                 other,
                 &["per-message", "per-thread", "per-lane"],
@@ -709,10 +706,10 @@ command = "echo"
         );
     }
 
-    // Legacy alias: "batched" used to mean per-thread; it now resolves to PerLane
-    // (the recommended batching default). Existing configs continue to load.
+    // The legacy alias "batched" was removed: only per-message / per-thread / per-lane
+    // are accepted. Configs still using "batched" must migrate to an explicit value.
     #[test]
-    fn message_processing_mode_batched_alias_is_per_lane() {
+    fn message_processing_mode_batched_is_rejected() {
         let toml = r#"
 [discord]
 bot_token = "t"
@@ -721,11 +718,7 @@ message_processing_mode = "batched"
 [agent]
 command = "echo"
 "#;
-        let cfg = parse_config(toml, "test").unwrap();
-        assert_eq!(
-            cfg.discord.unwrap().message_processing_mode,
-            MessageProcessingMode::PerLane
-        );
+        assert!(parse_config(toml, "test").is_err());
     }
 
     #[test]

From 26570c42c5c5615146654527c6095608989d2a41 Mon Sep 17 00:00:00 2001
From: shaun-agent <265093149+shaun-agent@users.noreply.github.com>
Date: Mon, 4 May 2026 06:31:30 +0000
Subject: [PATCH 12/32] fix(dispatch): restore shared thread sessions and abort
 consumers on shutdown

Co-authored-by: Brett Chien <1193046+brettchien@users.noreply.github.com>
---
 src/dispatch.rs | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index ffbd1312..7d923f90 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -155,6 +155,18 @@ impl Dispatcher {
         }
     }
 
+    /// Build the shared session pool key for a routed channel.
+    ///
+    /// Unlike dispatcher keys, session keys never include sender identity.
+    /// They track the logical conversation thread across all grouping modes.
+    fn session_key(thread_channel: &ChannelRef) -> String {
+        let logical_thread_id = thread_channel
+            .thread_id
+            .as_deref()
+            .unwrap_or(&thread_channel.channel_id);
+        format!("{}:{}", thread_channel.platform, logical_thread_id)
+    }
+
     /// Submit one arrival event for the given thread.
     ///
     /// - If the thread has no active consumer, one is spawned lazily.
@@ -351,6 +363,7 @@ impl Dispatcher {
                     "shutdown dropped pending messages without dispatch",
                 );
             }
+            handle.consumer.abort();
         }
         map.clear();
     }
@@ -453,6 +466,7 @@ async fn dispatch_batch(
 ) {
     let dispatch_start = Instant::now();
     let batch_size = batch.len();
+    let session_key = Dispatcher::session_key(thread_channel);
 
     // Apply 👀 reaction to every message in the batch before dispatch (§6.7).
     // Parallelized so first-token latency isn't paid for N serial reaction RPCs.
@@ -486,7 +500,7 @@ async fn dispatch_batch(
     let packed_block_count = content_blocks.len();
 
     // Ensure session exists.
-    if let Err(e) = router.pool().get_or_create(thread_key).await {
+    if let Err(e) = router.pool().get_or_create(&session_key).await {
         let user_msg = format_user_error(&e.to_string());
         let _ = adapter
             .send_message(thread_channel, &format!("⚠️ {user_msg}"))
@@ -508,7 +522,7 @@ async fn dispatch_batch(
     let result = router
         .stream_prompt_blocks(
             adapter,
-            thread_key,
+            &session_key,
             content_blocks,
             thread_channel,
             reactions.clone(),

From f7bd04457d1a7bc624c756c3dee8e10771b622f7 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 06:34:26 +0000
Subject: [PATCH 13/32] feat(chart): expose message_processing_mode and
 batching params
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds messageProcessingMode / maxBufferedMessages / maxBatchTokens to the
Discord, Slack, and Gateway sections of the chart. Without these the
turn-boundary batching modes shipped in PR #686 are unreachable from a
helm-deployed instance — the Rust binary just falls back to per-message.

- configmap.yaml: render the three keys for each platform when set, with
  enum validation matching the Rust deserializer
  ("must be one of: per-message, per-thread, per-lane").
- values.yaml: commented examples for each platform.
- tests/message-processing-mode_test.yaml: 12 helm-unittest cases covering
  render, enum rejection, omit-when-unset, and numeric param render across
  all three platforms.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 charts/openab/templates/configmap.yaml        |  39 ++++++
 .../tests/message-processing-mode_test.yaml   | 124 ++++++++++++++++++
 charts/openab/values.yaml                     |  24 ++++
 3 files changed, 187 insertions(+)
 create mode 100644 charts/openab/tests/message-processing-mode_test.yaml

diff --git a/charts/openab/templates/configmap.yaml b/charts/openab/templates/configmap.yaml
index 78316b59..2cff6218 100644
--- a/charts/openab/templates/configmap.yaml
+++ b/charts/openab/templates/configmap.yaml
@@ -58,6 +58,19 @@ data:
     {{- if hasKey ($cfg.discord | default dict) "maxBotTurns" }}
     max_bot_turns = {{ ($cfg.discord).maxBotTurns | int }}
     {{- end }}
+    {{- /* messageProcessingMode: per-message (default) | per-thread | per-lane (turn-boundary batching) */ -}}
+    {{- if hasKey ($cfg.discord | default dict) "messageProcessingMode" }}
+    {{- if not (has ($cfg.discord).messageProcessingMode (list "per-message" "per-thread" "per-lane")) }}
+    {{- fail (printf "agents.%s.discord.messageProcessingMode must be one of: per-message, per-thread, per-lane — got: %s" $name ($cfg.discord).messageProcessingMode) }}
+    {{- end }}
+    message_processing_mode = {{ ($cfg.discord).messageProcessingMode | toJson }}
+    {{- end }}
+    {{- if hasKey ($cfg.discord | default dict) "maxBufferedMessages" }}
+    max_buffered_messages = {{ ($cfg.discord).maxBufferedMessages | int }}
+    {{- end }}
+    {{- if hasKey ($cfg.discord | default dict) "maxBatchTokens" }}
+    max_batch_tokens = {{ ($cfg.discord).maxBatchTokens | int }}
+    {{- end }}
     {{- end }}
 
     {{- if and ($cfg.slack).enabled }}
@@ -97,6 +110,19 @@ data:
     {{- if hasKey ($cfg.slack | default dict) "maxBotTurns" }}
     max_bot_turns = {{ ($cfg.slack).maxBotTurns | int }}
     {{- end }}
+    {{- /* messageProcessingMode: per-message (default) | per-thread | per-lane (turn-boundary batching) */ -}}
+    {{- if hasKey ($cfg.slack | default dict) "messageProcessingMode" }}
+    {{- if not (has ($cfg.slack).messageProcessingMode (list "per-message" "per-thread" "per-lane")) }}
+    {{- fail (printf "agents.%s.slack.messageProcessingMode must be one of: per-message, per-thread, per-lane — got: %s" $name ($cfg.slack).messageProcessingMode) }}
+    {{- end }}
+    message_processing_mode = {{ ($cfg.slack).messageProcessingMode | toJson }}
+    {{- end }}
+    {{- if hasKey ($cfg.slack | default dict) "maxBufferedMessages" }}
+    max_buffered_messages = {{ ($cfg.slack).maxBufferedMessages | int }}
+    {{- end }}
+    {{- if hasKey ($cfg.slack | default dict) "maxBatchTokens" }}
+    max_batch_tokens = {{ ($cfg.slack).maxBatchTokens | int }}
+    {{- end }}
     {{- end }}
 
     [agent]
@@ -162,6 +188,19 @@ data:
     {{- end }}
     {{- end }}
     allowed_users = {{ ($cfg.gateway).allowedUsers | default list | toJson }}
+    {{- /* messageProcessingMode: per-message (default) | per-thread | per-lane (turn-boundary batching) */ -}}
+    {{- if hasKey ($cfg.gateway | default dict) "messageProcessingMode" }}
+    {{- if not (has ($cfg.gateway).messageProcessingMode (list "per-message" "per-thread" "per-lane")) }}
+    {{- fail (printf "agents.%s.gateway.messageProcessingMode must be one of: per-message, per-thread, per-lane — got: %s" $name ($cfg.gateway).messageProcessingMode) }}
+    {{- end }}
+    message_processing_mode = {{ ($cfg.gateway).messageProcessingMode | toJson }}
+    {{- end }}
+    {{- if hasKey ($cfg.gateway | default dict) "maxBufferedMessages" }}
+    max_buffered_messages = {{ ($cfg.gateway).maxBufferedMessages | int }}
+    {{- end }}
+    {{- if hasKey ($cfg.gateway | default dict) "maxBatchTokens" }}
+    max_batch_tokens = {{ ($cfg.gateway).maxBatchTokens | int }}
+    {{- end }}
     {{- end }}
     {{- if or ($cfg.cronjobs) (($cfg.cron).usercronEnabled) (($cfg.cron).usercronPath) }}
 
diff --git a/charts/openab/tests/message-processing-mode_test.yaml b/charts/openab/tests/message-processing-mode_test.yaml
new file mode 100644
index 00000000..f70fde5b
--- /dev/null
+++ b/charts/openab/tests/message-processing-mode_test.yaml
@@ -0,0 +1,124 @@
+suite: messageProcessingMode & batching params
+templates:
+  - templates/configmap.yaml
+tests:
+  - it: discord renders message_processing_mode = "per-lane"
+    set:
+      agents.kiro.discord.messageProcessingMode: per-lane
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode = "per-lane"'
+
+  - it: discord renders message_processing_mode = "per-thread"
+    set:
+      agents.kiro.discord.messageProcessingMode: per-thread
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode = "per-thread"'
+
+  - it: discord renders message_processing_mode = "per-message"
+    set:
+      agents.kiro.discord.messageProcessingMode: per-message
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode = "per-message"'
+
+  - it: discord rejects invalid messageProcessingMode value
+    set:
+      agents.kiro.discord.messageProcessingMode: batched
+    asserts:
+      - failedTemplate:
+          errorPattern: "must be one of: per-message, per-thread, per-lane"
+
+  - it: discord omits message_processing_mode when not set
+    asserts:
+      - notMatchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode'
+
+  - it: discord renders maxBufferedMessages and maxBatchTokens
+    set:
+      agents.kiro.discord.messageProcessingMode: per-lane
+      agents.kiro.discord.maxBufferedMessages: 25
+      agents.kiro.discord.maxBatchTokens: 32000
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_buffered_messages = 25'
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_batch_tokens = 32000'
+
+  - it: slack renders message_processing_mode = "per-thread"
+    set:
+      agents.kiro.slack.enabled: true
+      agents.kiro.slack.botToken: xoxb-x
+      agents.kiro.slack.appToken: xapp-x
+      agents.kiro.slack.messageProcessingMode: per-thread
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode = "per-thread"'
+
+  - it: slack rejects invalid messageProcessingMode value
+    set:
+      agents.kiro.slack.enabled: true
+      agents.kiro.slack.botToken: xoxb-x
+      agents.kiro.slack.appToken: xapp-x
+      agents.kiro.slack.messageProcessingMode: batched
+    asserts:
+      - failedTemplate:
+          errorPattern: "must be one of: per-message, per-thread, per-lane"
+
+  - it: slack renders maxBufferedMessages and maxBatchTokens
+    set:
+      agents.kiro.slack.enabled: true
+      agents.kiro.slack.botToken: xoxb-x
+      agents.kiro.slack.appToken: xapp-x
+      agents.kiro.slack.messageProcessingMode: per-lane
+      agents.kiro.slack.maxBufferedMessages: 15
+      agents.kiro.slack.maxBatchTokens: 18000
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_buffered_messages = 15'
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_batch_tokens = 18000'
+
+  - it: gateway renders message_processing_mode = "per-lane"
+    set:
+      agents.kiro.gateway.enabled: true
+      agents.kiro.gateway.url: ws://openab-gateway:8080/ws
+      agents.kiro.gateway.messageProcessingMode: per-lane
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'message_processing_mode = "per-lane"'
+
+  - it: gateway rejects invalid messageProcessingMode value
+    set:
+      agents.kiro.gateway.enabled: true
+      agents.kiro.gateway.url: ws://openab-gateway:8080/ws
+      agents.kiro.gateway.messageProcessingMode: batched
+    asserts:
+      - failedTemplate:
+          errorPattern: "must be one of: per-message, per-thread, per-lane"
+
+  - it: gateway renders maxBufferedMessages and maxBatchTokens
+    set:
+      agents.kiro.gateway.enabled: true
+      agents.kiro.gateway.url: ws://openab-gateway:8080/ws
+      agents.kiro.gateway.messageProcessingMode: per-thread
+      agents.kiro.gateway.maxBufferedMessages: 50
+      agents.kiro.gateway.maxBatchTokens: 12000
+    asserts:
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_buffered_messages = 50'
+      - matchRegex:
+          path: data["config.toml"]
+          pattern: 'max_batch_tokens = 12000'
diff --git a/charts/openab/values.yaml b/charts/openab/values.yaml
index 4f12e315..e81faf9c 100644
--- a/charts/openab/values.yaml
+++ b/charts/openab/values.yaml
@@ -151,6 +151,14 @@ agents:
       # multi-agent collaborations; lower to throttle runaway loops more
       # aggressively. Hard cap remains 100 regardless (compiled-in).
       # maxBotTurns: 20
+      # messageProcessingMode: "per-message" (default) | "per-thread" | "per-lane"
+      # per-thread: all senders in a thread share one batch → one ACP turn per turn boundary
+      # per-lane:   each (thread, sender) batches independently → no silent-drop risk
+      # messageProcessingMode: "per-lane"
+      # maxBufferedMessages: per-thread mpsc capacity for batching modes (default 10)
+      # maxBufferedMessages: 10
+      # maxBatchTokens: soft token cap per ACP turn for batching modes (default 24000)
+      # maxBatchTokens: 24000
     slack:
       enabled: false
       botToken: ""       # Bot User OAuth Token (xoxb-...)
@@ -175,6 +183,14 @@ agents:
       # multi-agent collaborations; lower to throttle runaway loops more
       # aggressively. Hard cap remains 100 regardless (compiled-in).
       # maxBotTurns: 20
+      # messageProcessingMode: "per-message" (default) | "per-thread" | "per-lane"
+      # per-thread: all senders in a thread share one batch → one ACP turn per turn boundary
+      # per-lane:   each (thread, sender) batches independently → no silent-drop risk
+      # messageProcessingMode: "per-lane"
+      # maxBufferedMessages: per-thread mpsc capacity for batching modes (default 10)
+      # maxBufferedMessages: 10
+      # maxBatchTokens: soft token cap per ACP turn for batching modes (default 24000)
+      # maxBatchTokens: 24000
     workingDir: /home/agent
     env: {}
     envFrom: []
@@ -200,6 +216,14 @@ agents:
       platform: "telegram"  # default platform when gateway is enabled
       token: ""         # optional shared secret (injected via GATEWAY_WS_TOKEN env var)
       botUsername: ""   # optional, for @mention gating
+      # messageProcessingMode: "per-message" (default) | "per-thread" | "per-lane"
+      # per-thread: all senders in a thread share one batch → one ACP turn per turn boundary
+      # per-lane:   each (thread, sender) batches independently → no silent-drop risk
+      # messageProcessingMode: "per-lane"
+      # maxBufferedMessages: per-thread mpsc capacity for batching modes (default 10)
+      # maxBufferedMessages: 10
+      # maxBatchTokens: soft token cap per ACP turn for batching modes (default 24000)
+      # maxBatchTokens: 24000
       image: "ghcr.io/openabdev/openab-gateway"  # gateway container image
       tag: ""           # defaults to Chart.AppVersion
       strategy: "Recreate"  # Recreate (default, prevents concurrent WS conflicts) or RollingUpdate

From 436587c7698950ad7c2868c40c03e59697e6f12b Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 08:41:40 +0000
Subject: [PATCH 14/32] refactor: rename enum variants to drop redundant Per
 prefix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aligns MessageProcessingMode and BatchGrouping with the rest of the
codebase (TableMode, AllowBots, ToolDisplay, TurnSeverity, etc.) where
variants don't repeat the enum-name-derived prefix. Also fixes the CI
clippy::enum_variant_names failure on PR #686.

Wire format unchanged — manual Deserialize still matches per-message /
per-thread / per-lane strings; helm chart and TOML configs need no edits.

- MessageProcessingMode { PerMessage, PerThread, PerLane } -> { Message, Thread, Lane }
- BatchGrouping { PerThread, PerLane } -> { Thread, Lane }
---
 src/config.rs   | 26 +++++++++++++-------------
 src/discord.rs  |  2 +-
 src/dispatch.rs | 26 +++++++++++++-------------
 src/main.rs     | 40 ++++++++++++++++++++--------------------
 4 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 5b1681e0..afe43943 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -6,26 +6,26 @@ use std::path::Path;
 
 /// Controls how incoming messages are dispatched to ACP turns.
 ///
-/// - `PerMessage` (default): each message becomes its own ACP turn (v0.8.2-beta.1 behaviour).
-/// - `PerThread`: one buffer per thread; all senders in a thread share a single batch and
+/// - `Message` (default): each message becomes its own ACP turn (v0.8.2-beta.1 behaviour).
+/// - `Thread`: one buffer per thread; all senders in a thread share a single batch and
 ///   produce one ACP turn per turn boundary.
-/// - `PerLane`: one buffer per (thread, sender); each sender batches independently and gets
+/// - `Lane`: one buffer per (thread, sender); each sender batches independently and gets
 ///   its own ACP turn — no silent-drop risk when multiple senders address the same thread.
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub enum MessageProcessingMode {
     #[default]
-    PerMessage,
-    PerThread,
-    PerLane,
+    Message,
+    Thread,
+    Lane,
 }
 
 impl<'de> Deserialize<'de> for MessageProcessingMode {
     fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
         let s = String::deserialize(deserializer)?;
         match s.to_lowercase().replace('-', "_").as_str() {
-            "per_message" => Ok(Self::PerMessage),
-            "per_thread" => Ok(Self::PerThread),
-            "per_lane" => Ok(Self::PerLane),
+            "per_message" => Ok(Self::Message),
+            "per_thread" => Ok(Self::Thread),
+            "per_lane" => Ok(Self::Lane),
             other => Err(serde::de::Error::unknown_variant(
                 other,
                 &["per-message", "per-thread", "per-lane"],
@@ -668,7 +668,7 @@ command = "echo"
         let cfg = parse_config(toml, "test").unwrap();
         assert_eq!(
             cfg.discord.unwrap().message_processing_mode,
-            MessageProcessingMode::PerMessage
+            MessageProcessingMode::Message
         );
     }
 
@@ -685,7 +685,7 @@ command = "echo"
         let cfg = parse_config(toml, "test").unwrap();
         assert_eq!(
             cfg.discord.unwrap().message_processing_mode,
-            MessageProcessingMode::PerThread
+            MessageProcessingMode::Thread
         );
     }
 
@@ -702,7 +702,7 @@ command = "echo"
         let cfg = parse_config(toml, "test").unwrap();
         assert_eq!(
             cfg.discord.unwrap().message_processing_mode,
-            MessageProcessingMode::PerLane
+            MessageProcessingMode::Lane
         );
     }
 
@@ -726,7 +726,7 @@ command = "echo"
         let cfg = parse_config(MINIMAL_TOML, "test").unwrap();
         assert_eq!(
             cfg.discord.unwrap().message_processing_mode,
-            MessageProcessingMode::PerMessage
+            MessageProcessingMode::Message
         );
     }
 
diff --git a/src/discord.rs b/src/discord.rs
index 148b517c..6bc224bd 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -160,7 +160,7 @@ pub struct Handler {
     pub bot_turns: tokio::sync::Mutex<BotTurnTracker>,
     /// Allow the bot to respond to Discord DMs.
     pub allow_dm: bool,
-    /// Per-thread dispatcher (PerMessage mode uses cap=1 for FIFO; Batched uses configured cap).
+    /// Per-thread dispatcher (Message mode uses cap=1 for FIFO; Thread/Lane use configured cap).
     pub dispatcher: Arc<crate::dispatch::Dispatcher>,
 }
 
diff --git a/src/dispatch.rs b/src/dispatch.rs
index 7d923f90..7e21da55 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -50,18 +50,18 @@ pub struct BufferedMessage {
 
 /// How `thread_key` is built for the dispatcher's per-thread map.
 ///
-/// - `PerThread`: one mpsc per thread → all senders in a thread share one batch → one
+/// - `Thread`: one mpsc per thread → all senders in a thread share one batch → one
 ///   ACP turn per batch (cheaper, but risks silent drop when the agent's single reply
 ///   forgets to address some senders).
-/// - `PerLane`: one mpsc per (thread, sender) → each sender batches independently and
+/// - `Lane`: one mpsc per (thread, sender) → each sender batches independently and
 ///   gets a dedicated ACP turn. Sessions are still shared per-thread; turns serialise
 ///   through the shared session.
 ///
 /// Derived from `config::MessageProcessingMode` in `main.rs`.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum BatchGrouping {
-    PerThread,
-    PerLane,
+    Thread,
+    Lane,
 }
 
 /// Error returned by `Dispatcher::submit`.
@@ -142,7 +142,7 @@ impl Dispatcher {
 
     /// Build the dispatcher key for a (platform, thread, sender) tuple.
     ///
-    /// In `PerThread` mode the sender is ignored; in `PerLane` mode the sender is appended
+    /// In `Thread` mode the sender is ignored; in `Lane` mode the sender is appended
     /// so each (thread, sender) pair gets its own mpsc and consumer.
     ///
     /// Note: this is the *dispatcher* key, not the *session pool* key. Session pool keys
@@ -150,8 +150,8 @@ impl Dispatcher {
     /// shared per-thread by design).
     pub fn key(&self, platform: &str, thread_id: &str, sender_id: &str) -> String {
         match self.grouping {
-            BatchGrouping::PerThread => format!("{platform}:{thread_id}"),
-            BatchGrouping::PerLane => format!("{platform}:{thread_id}:{sender_id}"),
+            BatchGrouping::Thread => format!("{platform}:{thread_id}"),
+            BatchGrouping::Lane => format!("{platform}:{thread_id}:{sender_id}"),
         }
     }
 
@@ -293,7 +293,7 @@ impl Dispatcher {
     /// regardless of grouping, and abort each consumer (§2.5 / §4.4). Returns
     /// the total number of buffered messages discarded across all lanes.
     ///
-    /// Matches both PerThread keys (`<platform>:<thread_id>`) and PerLane keys
+    /// Matches both Thread keys (`<platform>:<thread_id>`) and Lane keys
     /// (`<platform>:<thread_id>:<sender_id>`). Used by `/reset` and
     /// `/cancel-all` to clear the entire thread, not just one lane.
     ///
@@ -895,14 +895,14 @@ mod tests {
 
     #[tokio::test]
     async fn key_per_thread_ignores_sender() {
-        let d = make_dispatcher(BatchGrouping::PerThread);
+        let d = make_dispatcher(BatchGrouping::Thread);
         assert_eq!(d.key("discord", "T1", "userA"), "discord:T1");
         assert_eq!(d.key("discord", "T1", "userB"), "discord:T1");
     }
 
     #[tokio::test]
     async fn key_per_lane_includes_sender() {
-        let d = make_dispatcher(BatchGrouping::PerLane);
+        let d = make_dispatcher(BatchGrouping::Lane);
         assert_eq!(d.key("discord", "T1", "userA"), "discord:T1:userA");
         assert_eq!(d.key("discord", "T1", "userB"), "discord:T1:userB");
         // Different threads remain distinct.
@@ -924,7 +924,7 @@ mod tests {
 
     #[tokio::test]
     async fn cancel_buffered_thread_drops_per_thread_key() {
-        let d = make_dispatcher(BatchGrouping::PerThread);
+        let d = make_dispatcher(BatchGrouping::Thread);
         insert_dummy_handle(&d, "discord:T1");
         insert_dummy_handle(&d, "discord:T2"); // different thread, must survive
         assert_eq!(d.cancel_buffered_thread("discord", "T1"), 0); // no buffered msgs
@@ -935,7 +935,7 @@ mod tests {
 
     #[tokio::test]
     async fn cancel_buffered_thread_drops_all_lanes() {
-        let d = make_dispatcher(BatchGrouping::PerLane);
+        let d = make_dispatcher(BatchGrouping::Lane);
         insert_dummy_handle(&d, "discord:T1:userA");
         insert_dummy_handle(&d, "discord:T1:userB");
         insert_dummy_handle(&d, "discord:T2:userA"); // different thread
@@ -951,7 +951,7 @@ mod tests {
     #[tokio::test]
     async fn cancel_buffered_thread_does_not_match_thread_id_prefix() {
         // T1 must not match T10 / T11 (substring trap).
-        let d = make_dispatcher(BatchGrouping::PerLane);
+        let d = make_dispatcher(BatchGrouping::Lane);
         insert_dummy_handle(&d, "discord:T1:userA");
         insert_dummy_handle(&d, "discord:T10:userA");
         d.cancel_buffered_thread("discord", "T1");
diff --git a/src/main.rs b/src/main.rs
index a3be793e..6372b680 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -199,16 +199,16 @@ async fn main() -> anyhow::Result<()> {
         let max_bot_turns = slack_cfg.max_bot_turns;
         let slack_shutdown_rx = shutdown_rx.clone();
         let adapter = shared_slack_adapter.clone().expect("shared_slack_adapter must exist when slack config is present");
-        // Dispatcher is the sole serialization path for all modes. PerMessage = cap 1
-        // (each message dispatches alone, FIFO). PerThread / PerLane = configured cap;
+        // Dispatcher is the sole serialization path for all modes. Message = cap 1
+        // (each message dispatches alone, FIFO). Thread / Lane = configured cap;
         // grouping decides whether senders share a buffer or get their own lane.
         let (slack_cap, slack_grouping) = match slack_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage =>
-                (1, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerThread =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerLane =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
+            config::MessageProcessingMode::Message =>
+                (1, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Thread =>
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Lane =>
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
         };
         let slack_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),
@@ -248,12 +248,12 @@ async fn main() -> anyhow::Result<()> {
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
         let (gw_cap, gw_grouping) = match gw_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage =>
-                (1, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerThread =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerLane =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
+            config::MessageProcessingMode::Message =>
+                (1, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Thread =>
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Lane =>
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
         };
         let gw_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),
@@ -347,12 +347,12 @@ async fn main() -> anyhow::Result<()> {
         );
 
         let (discord_cap, discord_grouping) = match discord_cfg.message_processing_mode {
-            config::MessageProcessingMode::PerMessage =>
-                (1, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerThread =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::PerThread),
-            config::MessageProcessingMode::PerLane =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::PerLane),
+            config::MessageProcessingMode::Message =>
+                (1, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Thread =>
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+            config::MessageProcessingMode::Lane =>
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
         };
         let discord_dispatcher = Arc::new(dispatch::Dispatcher::new(
             router.clone(),

From 991f71fb3d6f8367f368c9e470bf0a294878c21d Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 10:24:40 +0000
Subject: [PATCH 15/32] feat(config): validate max_batch_tokens > 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Setting max_batch_tokens=0 doesn't crash but forces every batch to size 1
via the consumer loop's token-cap check — functionally per-message mode
through a confusing path. Reject it at config parse time, alongside the
existing max_buffered_messages > 0 check.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/config.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/config.rs b/src/config.rs
index afe43943..fff52736 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -512,15 +512,20 @@ fn parse_config(raw: &str, source: &str) -> anyhow::Result<Config> {
     let config: Config = toml::from_str(&expanded)
         .map_err(|e| anyhow::anyhow!("failed to parse config from {source}: {e}"))?;
 
-    // Validate max_buffered_messages > 0 (tokio::sync::mpsc::channel panics on 0).
+    // Validate max_buffered_messages > 0 (tokio::sync::mpsc::channel panics on 0)
+    // and max_batch_tokens > 0 (otherwise the consumer's token-cap check forces every
+    // batch to size 1 — functionally per-message via a confusing path).
     if let Some(ref d) = config.discord {
         anyhow::ensure!(d.max_buffered_messages > 0, "discord.max_buffered_messages must be > 0");
+        anyhow::ensure!(d.max_batch_tokens > 0, "discord.max_batch_tokens must be > 0");
     }
     if let Some(ref s) = config.slack {
         anyhow::ensure!(s.max_buffered_messages > 0, "slack.max_buffered_messages must be > 0");
+        anyhow::ensure!(s.max_batch_tokens > 0, "slack.max_batch_tokens must be > 0");
     }
     if let Some(ref g) = config.gateway {
         anyhow::ensure!(g.max_buffered_messages > 0, "gateway.max_buffered_messages must be > 0");
+        anyhow::ensure!(g.max_batch_tokens > 0, "gateway.max_batch_tokens must be > 0");
     }
 
     Ok(config)

From 5e413a2545c12578109166b94c106791235a0c78 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 10:24:48 +0000
Subject: [PATCH 16/32] test(dispatch): cover sweep_stale and shutdown

Add an alive_consumer_handle helper (parks on pending::<()>) and four
unit tests:

- sweep_stale removes finished consumers, leaves running ones alone
- shutdown clears the per-thread map and aborts running consumers
  (verified via abort_handle().is_finished() after a runtime tick)

These paths are simple but safety-critical (SIGTERM cleanup + idle-task
GC); the existing dummy_handle / make_dispatcher scaffolding already
covers the test surface, so no new mocks needed.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 7e21da55..d88cf454 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -959,4 +959,74 @@ mod tests {
         assert!(!map.contains_key("discord:T1:userA"));
         assert!(map.contains_key("discord:T10:userA"));
     }
+
+    // Long-running consumer that parks until aborted — used by sweep_stale /
+    // shutdown tests to exercise the "still alive" path.
+    fn alive_consumer_handle() -> ThreadHandle {
+        let (tx, _rx) = tokio::sync::mpsc::channel::<BufferedMessage>(10);
+        let consumer = tokio::spawn(async {
+            std::future::pending::<()>().await;
+        });
+        ThreadHandle {
+            tx,
+            consumer,
+            generation: 0,
+            channel_id: "c".into(),
+            adapter_kind: "discord".into(),
+        }
+    }
+
+    #[tokio::test]
+    async fn sweep_stale_removes_finished_consumers() {
+        let d = make_dispatcher(BatchGrouping::Thread);
+        insert_dummy_handle(&d, "discord:T1");
+        insert_dummy_handle(&d, "discord:T2");
+        // Yield so the empty-body spawned tasks actually run to completion
+        // before is_finished() is checked.
+        tokio::time::sleep(Duration::from_millis(10)).await;
+        let swept = d.sweep_stale();
+        assert_eq!(swept, 2);
+        assert!(d.per_thread.lock().unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn sweep_stale_keeps_running_consumers() {
+        let d = make_dispatcher(BatchGrouping::Thread);
+        let abort = {
+            let h = alive_consumer_handle();
+            let a = h.consumer.abort_handle();
+            d.per_thread.lock().unwrap().insert("alive".into(), h);
+            a
+        };
+        let swept = d.sweep_stale();
+        assert_eq!(swept, 0);
+        assert!(d.per_thread.lock().unwrap().contains_key("alive"));
+        // Cleanup so the parked task doesn't linger across tests.
+        abort.abort();
+    }
+
+    #[tokio::test]
+    async fn shutdown_clears_all_handles() {
+        let d = make_dispatcher(BatchGrouping::Thread);
+        insert_dummy_handle(&d, "k1");
+        insert_dummy_handle(&d, "k2");
+        insert_dummy_handle(&d, "k3");
+        d.shutdown();
+        assert!(d.per_thread.lock().unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn shutdown_aborts_running_consumers() {
+        let d = make_dispatcher(BatchGrouping::Thread);
+        let abort = {
+            let h = alive_consumer_handle();
+            let a = h.consumer.abort_handle();
+            d.per_thread.lock().unwrap().insert("k".into(), h);
+            a
+        };
+        d.shutdown();
+        // Give the runtime a tick to process abort + map drop.
+        tokio::time::sleep(Duration::from_millis(10)).await;
+        assert!(abort.is_finished());
+    }
 }

From 98c2086712623801efb090d8a3b59f5928670faf Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Mon, 4 May 2026 11:43:24 +0000
Subject: [PATCH 17/32] test(dispatch): cover consumer_loop via DispatchTarget
 trait seam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the NIT 2 gap from PR #686 review. The consumer_loop orchestration
(greedy drain / token cap overflow / idle timeout / SendError eviction)
was previously only verified by manual staging smoke. The trait seam
also unblocks the §2.5 SendError end-to-end test.

Refactor:
- DispatchTarget trait (reactions_config / ensure_session /
  stream_prompt_blocks) extracted from AdapterRouter's surface.
  AdapterRouter implements it by delegation.
- Dispatcher now holds Arc<dyn DispatchTarget>. Production callsites
  unchanged — Arc<AdapterRouter> auto-coerces via CoerceUnsized.
- Add Dispatcher::with_idle_timeout (test knob); Dispatcher::new keeps
  the DEFAULT_CONSUMER_IDLE_TIMEOUT (5 min) production default.

Tests:
- MockDispatchTarget records dispatches; MockChatAdapter is a no-op stub.
- consumer_dispatches_single_message_as_one_batch (happy path)
- consumer_greedy_drain_combines_queued_messages_into_one_batch
  (3 pre-loaded msgs → 1 dispatch with 3 ContentBlocks)
- consumer_token_cap_splits_batch_preserving_fifo
  (2x 80-token msgs + cap=100 → 2 FIFO dispatches)
- consumer_exits_after_idle_timeout_with_no_messages (50ms timeout)
- submit_evicts_dead_handle_and_retries_with_fresh_consumer
  (manufactured dead handle: rx dropped, consumer parked → SendError
  → eviction + retry on fresh consumer)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 399 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 374 insertions(+), 25 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index d88cf454..0aa824aa 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -14,10 +14,12 @@ use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
 
 use anyhow::Result;
+use async_trait::async_trait;
 use tracing::{debug, error, info, info_span, warn};
 
 use crate::adapter::{AdapterRouter, ChannelRef, ChatAdapter, MessageRef};
 use crate::acp::ContentBlock;
+use crate::config::ReactionsConfig;
 use crate::error_display::format_user_error;
 use crate::reactions::StatusReactionController;
 
@@ -102,10 +104,77 @@ impl ThreadHandle {
     }
 }
 
+// ---------------------------------------------------------------------------
+// DispatchTarget — trait seam between Dispatcher and AdapterRouter
+// ---------------------------------------------------------------------------
+
+/// Surface that `consumer_loop` / `dispatch_batch` need from the underlying
+/// router. Extracted as a trait so the dispatcher can be unit-tested without
+/// spinning up a real `SessionPool` (which forks ACP CLI subprocesses).
+/// `AdapterRouter` is the production implementor; tests use a mock that
+/// records calls.
+#[async_trait]
+pub trait DispatchTarget: Send + Sync + 'static {
+    fn reactions_config(&self) -> &ReactionsConfig;
+
+    /// Ensure the ACP session for `session_key` exists (idempotent).
+    async fn ensure_session(&self, session_key: &str) -> Result<()>;
+
+    /// Drive one ACP turn with the pre-packed `content_blocks`.
+    #[allow(clippy::too_many_arguments)]
+    async fn stream_prompt_blocks(
+        &self,
+        adapter: &Arc<dyn ChatAdapter>,
+        session_key: &str,
+        content_blocks: Vec<ContentBlock>,
+        thread_channel: &ChannelRef,
+        reactions: Arc<StatusReactionController>,
+        other_bot_present: bool,
+    ) -> Result<()>;
+}
+
+#[async_trait]
+impl DispatchTarget for AdapterRouter {
+    fn reactions_config(&self) -> &ReactionsConfig {
+        AdapterRouter::reactions_config(self)
+    }
+
+    async fn ensure_session(&self, session_key: &str) -> Result<()> {
+        self.pool().get_or_create(session_key).await
+    }
+
+    async fn stream_prompt_blocks(
+        &self,
+        adapter: &Arc<dyn ChatAdapter>,
+        session_key: &str,
+        content_blocks: Vec<ContentBlock>,
+        thread_channel: &ChannelRef,
+        reactions: Arc<StatusReactionController>,
+        other_bot_present: bool,
+    ) -> Result<()> {
+        AdapterRouter::stream_prompt_blocks(
+            self,
+            adapter,
+            session_key,
+            content_blocks,
+            thread_channel,
+            reactions,
+            other_bot_present,
+        )
+        .await
+    }
+}
+
 // ---------------------------------------------------------------------------
 // Dispatcher
 // ---------------------------------------------------------------------------
 
+/// Default idle timeout for per-thread consumer tasks. When no message arrives
+/// within this window the consumer exits, allowing `per_thread` map cleanup on
+/// the next `submit` (via `SendError` → `try_evict_locked`). Prevents unbounded
+/// task/memory growth from one-shot thread keys (e.g. Slack non-thread messages).
+pub const DEFAULT_CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(300);
+
 /// Per-thread message dispatcher for batched mode.
 ///
 /// Constructed once in `main.rs` and shared via `Arc`. Platform adapters call
@@ -117,26 +186,46 @@ pub struct Dispatcher {
     /// every `submit` and consumed only when a fresh handle is inserted; wasted
     /// values are fine because generations need only be monotonic, not contiguous.
     next_generation: AtomicU64,
-    router: Arc<AdapterRouter>,
+    target: Arc<dyn DispatchTarget>,
     max_buffered_messages: usize,
     max_batch_tokens: usize,
     grouping: BatchGrouping,
+    idle_timeout: Duration,
 }
 
 impl Dispatcher {
     pub fn new(
-        router: Arc<AdapterRouter>,
+        target: Arc<dyn DispatchTarget>,
+        max_buffered_messages: usize,
+        max_batch_tokens: usize,
+        grouping: BatchGrouping,
+    ) -> Self {
+        Self::with_idle_timeout(
+            target,
+            max_buffered_messages,
+            max_batch_tokens,
+            grouping,
+            DEFAULT_CONSUMER_IDLE_TIMEOUT,
+        )
+    }
+
+    /// Like `new`, but with a custom consumer idle timeout. Test-only knob —
+    /// production code should use `new` (which applies `DEFAULT_CONSUMER_IDLE_TIMEOUT`).
+    pub fn with_idle_timeout(
+        target: Arc<dyn DispatchTarget>,
         max_buffered_messages: usize,
         max_batch_tokens: usize,
         grouping: BatchGrouping,
+        idle_timeout: Duration,
     ) -> Self {
         Self {
             per_thread: Mutex::new(HashMap::new()),
             next_generation: AtomicU64::new(0),
-            router,
+            target,
             max_buffered_messages,
             max_batch_tokens,
             grouping,
+            idle_timeout,
         }
     }
 
@@ -186,8 +275,9 @@ impl Dispatcher {
         msg: BufferedMessage,
     ) -> Result<(), DispatchError> {
         let cap = self.max_buffered_messages;
-        let router = Arc::clone(&self.router);
+        let target = Arc::clone(&self.target);
         let max_tokens = self.max_batch_tokens;
+        let idle_timeout = self.idle_timeout;
 
         // Pre-fetch a generation in case we end up inserting a fresh handle.
         // Wasted if the entry already exists; generations need only be monotonic.
@@ -212,10 +302,11 @@ impl Dispatcher {
                     thread_key.clone(),
                     thread_channel.clone(),
                     rx,
-                    Arc::clone(&router),
+                    Arc::clone(&target),
                     Arc::clone(&adapter),
                     cap,
                     max_tokens,
+                    idle_timeout,
                 ));
                 ThreadHandle {
                     tx,
@@ -248,10 +339,11 @@ impl Dispatcher {
                         thread_key.clone(),
                         thread_channel.clone(),
                         rx,
-                        Arc::clone(&router),
+                        Arc::clone(&target),
                         Arc::clone(&adapter),
                         cap,
                         max_tokens,
+                        idle_timeout,
                     ));
                     ThreadHandle {
                         tx,
@@ -274,7 +366,7 @@ impl Dispatcher {
                 let _ = adapter
                     .add_reaction(
                         &failed_msg.trigger_msg,
-                        &self.router.reactions_config().emojis.error,
+                        &self.target.reactions_config().emojis.error,
                     )
                     .await;
                 let _ = adapter
@@ -373,21 +465,16 @@ impl Dispatcher {
 // consumer_loop
 // ---------------------------------------------------------------------------
 
-/// Idle timeout for per-thread consumer tasks. When no message arrives within
-/// this window the consumer exits, allowing `per_thread` map cleanup on the
-/// next `submit` (via `SendError` → `try_evict_locked`). Prevents unbounded
-/// task/memory growth from one-shot thread keys (e.g. Slack non-thread messages).
-const CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(300); // 5 min
-
 #[allow(clippy::too_many_arguments)]
 async fn consumer_loop(
     thread_key: String,
     thread_channel: ChannelRef,
     mut rx: tokio::sync::mpsc::Receiver<BufferedMessage>,
-    router: Arc<AdapterRouter>,
+    target: Arc<dyn DispatchTarget>,
     adapter: Arc<dyn ChatAdapter>,
     max_batch: usize,
     max_tokens: usize,
+    idle_timeout: Duration,
 ) {
     // `pending` holds a message that exceeded the token cap for the current batch;
     // it becomes the first message of the next batch, preserving FIFO.
@@ -395,13 +482,13 @@ async fn consumer_loop(
 
     loop {
         // I1: block until at least one message arrives (zero latency for first message).
-        // Idle timeout: if no message arrives within CONSUMER_IDLE_TIMEOUT the
-        // consumer exits, freeing the task and mpsc. The next `submit` for this
-        // thread_key will observe `SendError`, evict the stale entry, and lazily
-        // spawn a fresh consumer (§2.5 generation check prevents mis-eviction).
+        // Idle timeout: if no message arrives within `idle_timeout` the consumer
+        // exits, freeing the task and mpsc. The next `submit` for this thread_key
+        // will observe `SendError`, evict the stale entry, and lazily spawn a
+        // fresh consumer (§2.5 generation check prevents mis-eviction).
         let first = match pending.take() {
             Some(msg) => msg,
-            None => match tokio::time::timeout(CONSUMER_IDLE_TIMEOUT, rx.recv()).await {
+            None => match tokio::time::timeout(idle_timeout, rx.recv()).await {
                 Ok(Some(msg)) => msg,
                 Ok(None) => {
                     // All senders dropped → shutdown() or cancel_buffered_thread().
@@ -443,7 +530,7 @@ async fn consumer_loop(
         dispatch_batch(
             &thread_key,
             &thread_channel,
-            &router,
+            &target,
             &adapter,
             batch,
             bot_present,
@@ -459,7 +546,7 @@ async fn consumer_loop(
 async fn dispatch_batch(
     thread_key: &str,
     thread_channel: &ChannelRef,
-    router: &Arc<AdapterRouter>,
+    target: &Arc<dyn DispatchTarget>,
     adapter: &Arc<dyn ChatAdapter>,
     batch: Vec<BufferedMessage>,
     other_bot_present: bool,
@@ -470,7 +557,7 @@ async fn dispatch_batch(
 
     // Apply 👀 reaction to every message in the batch before dispatch (§6.7).
     // Parallelized so first-token latency isn't paid for N serial reaction RPCs.
-    let queued_emoji = &router.reactions_config().emojis.queued;
+    let queued_emoji = &target.reactions_config().emojis.queued;
     futures_util::future::join_all(
         batch
             .iter()
@@ -500,7 +587,7 @@ async fn dispatch_batch(
     let packed_block_count = content_blocks.len();
 
     // Ensure session exists.
-    if let Err(e) = router.pool().get_or_create(&session_key).await {
+    if let Err(e) = target.ensure_session(&session_key).await {
         let user_msg = format_user_error(&e.to_string());
         let _ = adapter
             .send_message(thread_channel, &format!("⚠️ {user_msg}"))
@@ -509,7 +596,7 @@ async fn dispatch_batch(
         return;
     }
 
-    let reactions_config = router.reactions_config().clone();
+    let reactions_config = target.reactions_config().clone();
     let reactions = Arc::new(StatusReactionController::new(
         reactions_config.enabled,
         adapter.clone(),
@@ -519,7 +606,7 @@ async fn dispatch_batch(
     ));
     // 👀 already applied above; skip set_queued() to avoid double-reaction.
 
-    let result = router
+    let result = target
         .stream_prompt_blocks(
             adapter,
             &session_key,
@@ -1029,4 +1116,266 @@ mod tests {
         tokio::time::sleep(Duration::from_millis(10)).await;
         assert!(abort.is_finished());
     }
+
+    // -----------------------------------------------------------------------
+    // consumer_loop / dispatch_batch integration tests (NIT 2)
+    //
+    // These drive `consumer_loop` directly with a pre-populated mpsc, using
+    // `MockDispatchTarget` to record the calls that would otherwise hit a
+    // real `AdapterRouter` (and through it, ACP CLI subprocesses). This
+    // gives deterministic coverage of the orchestration paths the existing
+    // unit tests don't reach: greedy drain, token-cap overflow, idle timeout.
+    // -----------------------------------------------------------------------
+
+    /// One recorded `stream_prompt_blocks` invocation.
+    #[derive(Clone)]
+    struct RecordedDispatch {
+        block_count: usize,
+        other_bot_present: bool,
+    }
+
+    /// Mock `DispatchTarget` — records calls; never touches a real session pool.
+    struct MockDispatchTarget {
+        reactions: ReactionsConfig,
+        calls: Mutex<Vec<RecordedDispatch>>,
+        /// If set, `ensure_session` returns this error once.
+        ensure_err: Mutex<Option<String>>,
+        /// If set, `stream_prompt_blocks` returns this error once.
+        stream_err: Mutex<Option<String>>,
+    }
+
+    impl MockDispatchTarget {
+        fn new() -> Self {
+            Self {
+                reactions: ReactionsConfig::default(),
+                calls: Mutex::new(Vec::new()),
+                ensure_err: Mutex::new(None),
+                stream_err: Mutex::new(None),
+            }
+        }
+
+        fn calls(&self) -> Vec<RecordedDispatch> {
+            self.calls.lock().unwrap().clone()
+        }
+    }
+
+    #[async_trait]
+    impl DispatchTarget for MockDispatchTarget {
+        fn reactions_config(&self) -> &ReactionsConfig {
+            &self.reactions
+        }
+
+        async fn ensure_session(&self, _session_key: &str) -> Result<()> {
+            if let Some(msg) = self.ensure_err.lock().unwrap().take() {
+                return Err(anyhow::anyhow!(msg));
+            }
+            Ok(())
+        }
+
+        async fn stream_prompt_blocks(
+            &self,
+            _adapter: &Arc<dyn ChatAdapter>,
+            _session_key: &str,
+            content_blocks: Vec<ContentBlock>,
+            _thread_channel: &ChannelRef,
+            _reactions: Arc<StatusReactionController>,
+            other_bot_present: bool,
+        ) -> Result<()> {
+            self.calls.lock().unwrap().push(RecordedDispatch {
+                block_count: content_blocks.len(),
+                other_bot_present,
+            });
+            if let Some(msg) = self.stream_err.lock().unwrap().take() {
+                return Err(anyhow::anyhow!(msg));
+            }
+            Ok(())
+        }
+    }
+
+    /// Mock `ChatAdapter` — every method is a no-op success. The dispatch loop
+    /// invokes `add_reaction` (queued 👀), `platform`, and on the error path
+    /// `send_message`; nothing else needs real behavior here.
+    struct MockChatAdapter;
+
+    #[async_trait]
+    impl ChatAdapter for MockChatAdapter {
+        fn platform(&self) -> &'static str { "mock" }
+        fn message_limit(&self) -> usize { 2000 }
+
+        async fn send_message(&self, channel: &ChannelRef, _content: &str) -> Result<MessageRef> {
+            Ok(MessageRef { channel: channel.clone(), message_id: "mock-msg".into() })
+        }
+
+        async fn create_thread(
+            &self,
+            channel: &ChannelRef,
+            _trigger_msg: &MessageRef,
+            _title: &str,
+        ) -> Result<ChannelRef> {
+            Ok(channel.clone())
+        }
+
+        async fn add_reaction(&self, _msg: &MessageRef, _emoji: &str) -> Result<()> { Ok(()) }
+        async fn remove_reaction(&self, _msg: &MessageRef, _emoji: &str) -> Result<()> { Ok(()) }
+        fn use_streaming(&self, _other_bot_present: bool) -> bool { false }
+    }
+
+    fn make_channel(thread: &str) -> ChannelRef {
+        ChannelRef {
+            platform: "mock".into(),
+            channel_id: thread.into(),
+            thread_id: Some(thread.into()),
+            parent_id: None,
+            origin_event_id: None,
+        }
+    }
+
+    fn make_msg(prompt: &str, tokens: usize) -> BufferedMessage {
+        BufferedMessage {
+            sender_json: r#"{"schema":"openab.sender.v1","sender_id":"u","sender_name":"u"}"#.into(),
+            sender_name: "u".into(),
+            prompt: prompt.into(),
+            extra_blocks: vec![],
+            trigger_msg: MessageRef {
+                channel: make_channel("T"),
+                message_id: format!("m-{prompt}"),
+            },
+            arrived_at: Instant::now(),
+            estimated_tokens: tokens,
+            other_bot_present: false,
+        }
+    }
+
+    /// Pre-load `msgs` into a fresh mpsc, drop the sender, and run
+    /// `consumer_loop` to completion. Returns the recorded dispatches.
+    async fn run_consumer_with_messages(
+        msgs: Vec<BufferedMessage>,
+        max_batch: usize,
+        max_tokens: usize,
+    ) -> Vec<RecordedDispatch> {
+        let mock = Arc::new(MockDispatchTarget::new());
+        let target: Arc<dyn DispatchTarget> = mock.clone();
+        let adapter: Arc<dyn ChatAdapter> = Arc::new(MockChatAdapter);
+        let (tx, rx) = tokio::sync::mpsc::channel::<BufferedMessage>(msgs.len().max(1));
+        for m in msgs {
+            tx.send(m).await.unwrap();
+        }
+        drop(tx);
+
+        consumer_loop(
+            "mock:T".into(),
+            make_channel("T"),
+            rx,
+            target,
+            adapter,
+            max_batch,
+            max_tokens,
+            Duration::from_secs(60),
+        )
+        .await;
+
+        mock.calls()
+    }
+
+    #[tokio::test]
+    async fn consumer_dispatches_single_message_as_one_batch() {
+        let calls = run_consumer_with_messages(vec![make_msg("hi", 10)], 10, 24_000).await;
+        assert_eq!(calls.len(), 1);
+        // pack_arrival_event with no extra_blocks → 1 Text block per message.
+        assert_eq!(calls[0].block_count, 1);
+        assert!(!calls[0].other_bot_present);
+    }
+
+    #[tokio::test]
+    async fn consumer_greedy_drain_combines_queued_messages_into_one_batch() {
+        // 3 messages already in the queue when the consumer wakes → greedy
+        // drain pulls all 3, packs them into one batch, dispatches once.
+        let calls = run_consumer_with_messages(
+            vec![make_msg("a", 50), make_msg("b", 50), make_msg("c", 50)],
+            10,
+            24_000,
+        )
+        .await;
+        assert_eq!(calls.len(), 1, "expected a single batched dispatch");
+        assert_eq!(calls[0].block_count, 3, "one Text block per arrival event");
+    }
+
+    #[tokio::test]
+    async fn consumer_token_cap_splits_batch_preserving_fifo() {
+        // max_tokens=100, two 80-token messages → cumulative 160 > 100, so
+        // msg2 becomes `pending` and is dispatched in the next batch.
+        let calls =
+            run_consumer_with_messages(vec![make_msg("a", 80), make_msg("b", 80)], 10, 100).await;
+        assert_eq!(calls.len(), 2, "token cap should split into two batches");
+        assert_eq!(calls[0].block_count, 1);
+        assert_eq!(calls[1].block_count, 1);
+    }
+
+    #[tokio::test]
+    async fn consumer_exits_after_idle_timeout_with_no_messages() {
+        // No messages ever arrive; consumer should exit once `idle_timeout`
+        // elapses. Keep `tx` alive so the exit path is the timeout, not the
+        // "all senders dropped" branch.
+        let mock = Arc::new(MockDispatchTarget::new());
+        let target: Arc<dyn DispatchTarget> = mock.clone();
+        let adapter: Arc<dyn ChatAdapter> = Arc::new(MockChatAdapter);
+        let (tx, rx) = tokio::sync::mpsc::channel::<BufferedMessage>(1);
+        let consumer = tokio::spawn(consumer_loop(
+            "mock:T".into(),
+            make_channel("T"),
+            rx,
+            target,
+            adapter,
+            10,
+            24_000,
+            Duration::from_millis(50),
+        ));
+        // Wait enough for the timeout branch + a tick for the task to finish.
+        tokio::time::sleep(Duration::from_millis(150)).await;
+        assert!(consumer.is_finished(), "consumer should exit after idle timeout");
+        // No dispatches should have been recorded.
+        assert!(mock.calls().is_empty());
+        drop(tx);
+    }
+
+    #[tokio::test]
+    async fn submit_evicts_dead_handle_and_retries_with_fresh_consumer() {
+        // §2.5: if `tx.send()` returns `SendError` (consumer's rx dropped
+        // mid-flight), `submit` evicts the stale entry under lock and spawns
+        // a fresh consumer. Manufacture this state by inserting a handle
+        // whose consumer is still parked but whose rx has been dropped.
+        let mock = Arc::new(MockDispatchTarget::new());
+        let target: Arc<dyn DispatchTarget> = mock.clone();
+        let d = Dispatcher::new(target, 10, 24_000, BatchGrouping::Thread);
+        let adapter: Arc<dyn ChatAdapter> = Arc::new(MockChatAdapter);
+
+        let key = "mock:T".to_string();
+        let parked = {
+            let (tx, rx) = tokio::sync::mpsc::channel::<BufferedMessage>(10);
+            drop(rx); // closes the channel → next tx.send() yields SendError
+            let consumer = tokio::spawn(std::future::pending::<()>());
+            let abort = consumer.abort_handle();
+            let handle = ThreadHandle {
+                tx,
+                consumer,
+                generation: 999,
+                channel_id: "T".into(),
+                adapter_kind: "mock".into(),
+            };
+            d.per_thread.lock().unwrap().insert(key.clone(), handle);
+            abort
+        };
+
+        d.submit(key, make_channel("T"), adapter, make_msg("hello", 10))
+            .await
+            .expect("retry should spawn a fresh consumer");
+        // Give the freshly spawned consumer time to drain + dispatch.
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        let calls = mock.calls();
+        assert_eq!(calls.len(), 1, "fresh consumer should have dispatched the retry");
+        assert_eq!(calls[0].block_count, 1);
+
+        parked.abort();
+    }
 }

From 1dc0aae1488e8ae0a1209fa8f04f5a9cae6533fd Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 03:18:21 +0000
Subject: [PATCH 18/32] fix(adapter): make SenderContext.timestamp truly
 additive

Wraps the field in Option<String> with skip_serializing_if so consumers
that pre-date the addition see no new key in the serialized JSON.
All four producers (slack, discord, gateway, cron) wrap their existing
values in Some(...). Schema string stays openab.sender.v1.
---
 src/adapter.rs | 7 ++++---
 src/cron.rs    | 2 +-
 src/discord.rs | 2 +-
 src/gateway.rs | 4 ++--
 src/slack.rs   | 2 +-
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/adapter.rs b/src/adapter.rs
index 51cf01cd..f7cc8431 100644
--- a/src/adapter.rs
+++ b/src/adapter.rs
@@ -87,10 +87,11 @@ pub struct SenderContext {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub thread_id: Option<String>,
     pub is_bot: bool,
-    /// Platform message creation time (ISO 8601 UTC).
+    /// Platform message creation time (ISO 8601 UTC), if available.
     /// Discord/Slack: platform timestamp. Gateway: broker receive time (best-effort).
-    /// Additive field — schema stays openab.sender.v1.
-    pub timestamp: String,
+    /// Additive optional field — schema stays openab.sender.v1.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub timestamp: Option<String>,
 }
 
 // --- ChatAdapter trait ---
diff --git a/src/cron.rs b/src/cron.rs
index ffeda136..6c9db851 100644
--- a/src/cron.rs
+++ b/src/cron.rs
@@ -350,7 +350,7 @@ async fn fire_cronjob(
         channel_id: reply_channel.parent_id.as_deref().unwrap_or(&reply_channel.channel_id).to_string(),
         thread_id: reply_channel.thread_id.clone().or(Some(reply_channel.channel_id.clone())),
         is_bot: true,
-        timestamp: Utc::now().to_rfc3339(),
+        timestamp: Some(Utc::now().to_rfc3339()),
     };
     let sender_json = match serde_json::to_string(&sender) {
         Ok(j) => j,
diff --git a/src/discord.rs b/src/discord.rs
index 6bc224bd..bb0fd392 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -1181,7 +1181,7 @@ fn build_sender_context(
         channel_id: thread_parent_id.unwrap_or(msg_channel_id).to_string(),
         thread_id: thread_parent_id.map(|_| msg_channel_id.to_string()),
         is_bot,
-        timestamp: timestamp.to_string(),
+        timestamp: Some(timestamp.to_string()),
     }
 }
 
diff --git a/src/gateway.rs b/src/gateway.rs
index 28799f1b..99478d01 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -489,11 +489,11 @@ pub async fn run_gateway_adapter(
                                         thread_id: event.channel.thread_id.clone(),
                                         is_bot: event.sender.is_bot,
                                         // Gateway: use event timestamp if available, else broker receive time
-                                        timestamp: if event.timestamp.is_empty() {
+                                        timestamp: Some(if event.timestamp.is_empty() {
                                             crate::timestamp::now_iso8601()
                                         } else {
                                             event.timestamp.clone()
-                                        },
+                                        }),
                                     };
                                     let sender_json = serde_json::to_string(&sender_ctx)
                                         .unwrap_or_default();
diff --git a/src/slack.rs b/src/slack.rs
index 015bf978..5aa5cca2 100644
--- a/src/slack.rs
+++ b/src/slack.rs
@@ -1026,7 +1026,7 @@ async fn handle_message(
         channel_id: channel_id.clone(),
         thread_id: thread_ts.clone(),
         is_bot: is_bot_msg,
-        timestamp: crate::timestamp::slack_ts_to_iso8601(&ts),
+        timestamp: Some(crate::timestamp::slack_ts_to_iso8601(&ts)),
     };
 
     let trigger_msg = MessageRef {

From 138f6483c9c5682cb485cc8d0df930fab027302e Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 03:19:34 +0000
Subject: [PATCH 19/32] docs(dispatch): note re-acquire-after-await safety in
 submit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calls out why re-acquiring per_thread after tx.send().await cannot
deadlock — the first lock guard is dropped before the await point.
---
 src/dispatch.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 0aa824aa..a7fda259 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -322,6 +322,11 @@ impl Dispatcher {
         if let Err(e) = tx.send(msg).await {
             // Consumer has exited between our check and the send — race-safe
             // eviction under lock (§2.5), then transparent retry once.
+            //
+            // Safe to re-acquire `per_thread` here: the first lock guard above
+            // was dropped before `tx.send().await`, so this acquisition cannot
+            // deadlock against the await point. The same property holds for the
+            // retry acquisition below.
             {
                 let mut map = self.per_thread.lock().unwrap();
                 Self::try_evict_locked(&mut map, &thread_key, my_generation);

From 072010cd7f256420d52aa9a7908e7ad0dce1fbc6 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 03:39:15 +0000
Subject: [PATCH 20/32] fix(adapter): use sender_context as standalone
 delimiter, split prompt into own block

pack_arrival_event now emits per arrival:
  [Text "<sender_context>{json}</sender_context>"]   (delimiter)
  [Text transcript blocks from extra_blocks]
  [Text prompt]                                      (omitted if empty)
  [non-Text blocks (e.g. Image)]

The sender_context block stands alone as a structural delimiter so agents
can locate arrival boundaries by scanning for `<sender_context>` openers
in batched dispatch. Within each arrival, transcript text precedes the
typed prompt to match pre-batching adapter UX (voice content first), and
images trail the prompt as before. Tests updated to reflect the new
per-arrival block count (2 minimum: delimiter + prompt; +1 per transcript;
+N for image attachments).
---
 src/adapter.rs  |  36 ++++++++----
 src/dispatch.rs | 148 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 120 insertions(+), 64 deletions(-)

diff --git a/src/adapter.rs b/src/adapter.rs
index f7cc8431..0c2753c9 100644
--- a/src/adapter.rs
+++ b/src/adapter.rs
@@ -170,24 +170,38 @@ impl AdapterRouter {
         &self.reactions_config
     }
 
-    /// Pack one arrival event into ContentBlocks using the uniform per-arrival template:
-    ///   Text { "<sender_context>\n{json}\n</sender_context>\n\n{prompt}" }
-    ///   [extra_blocks in arrival order]
+    /// Pack one arrival event into ContentBlocks. Per-arrival layout:
+    ///   Text { "<sender_context>\n{json}\n</sender_context>" }   <- delimiter
+    ///   [Text blocks from extra_blocks (e.g. STT transcripts)]
+    ///   Text { "{prompt}" }                                       <- omitted if empty
+    ///   [non-Text blocks from extra_blocks (e.g. Image)]
     ///
-    /// This is the single packing code path for both per-message and batched dispatch
-    /// (ADR §3.5). For a batch of N messages, call this N times and concatenate.
+    /// The sender_context block stands alone so it can serve as a structural
+    /// delimiter between arrivals in batched dispatch — agents can scan for
+    /// `<sender_context>` openers to find arrival boundaries. Within an arrival,
+    /// transcript text precedes the typed prompt to match pre-batching adapter
+    /// behavior (voice content first), and images trail the prompt as before.
+    /// This is the single packing code path for both per-message and batched
+    /// dispatch (ADR §3.5). For a batch of N messages, call this N times and
+    /// concatenate.
     pub fn pack_arrival_event(
         sender_json: &str,
         prompt: &str,
         extra_blocks: Vec<ContentBlock>,
     ) -> Vec<ContentBlock> {
-        let header = format!(
-            "<sender_context>\n{}\n</sender_context>\n\n{}",
-            sender_json, prompt
-        );
-        let mut blocks = Vec::with_capacity(1 + extra_blocks.len());
+        let header = format!("<sender_context>\n{}\n</sender_context>", sender_json);
+        let (texts, others): (Vec<_>, Vec<_>) = extra_blocks
+            .into_iter()
+            .partition(|b| matches!(b, ContentBlock::Text { .. }));
+        let mut blocks = Vec::with_capacity(2 + texts.len() + others.len());
         blocks.push(ContentBlock::Text { text: header });
-        blocks.extend(extra_blocks);
+        blocks.extend(texts);
+        if !prompt.is_empty() {
+            blocks.push(ContentBlock::Text {
+                text: prompt.to_string(),
+            });
+        }
+        blocks.extend(others);
         blocks
     }
 
diff --git a/src/dispatch.rs b/src/dispatch.rs
index a7fda259..1c74b529 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -720,12 +720,20 @@ mod tests {
             "hello",
             vec![],
         );
-        assert_eq!(blocks.len(), 1);
+        // sender_context delimiter + prompt = 2 blocks
+        assert_eq!(blocks.len(), 2);
         if let ContentBlock::Text { text } = &blocks[0] {
             assert!(text.contains("<sender_context>"));
-            assert!(text.contains("hello"));
+            assert!(text.contains("</sender_context>"));
+            // Header is delimiter only — prompt lives in its own block.
+            assert!(!text.contains("hello"));
         } else {
-            panic!("expected Text block");
+            panic!("expected Text delimiter block");
+        }
+        if let ContentBlock::Text { text } = &blocks[1] {
+            assert_eq!(text, "hello");
+        } else {
+            panic!("expected Text prompt block");
         }
     }
 
@@ -736,25 +744,34 @@ mod tests {
             ContentBlock::Image { media_type: "image/png".into(), data: "abc".into() },
         ];
         let blocks = AdapterRouter::pack_arrival_event("{}", "prompt", extra);
-        // header + 2 extra = 3 blocks
-        assert_eq!(blocks.len(), 3);
-        // extra blocks follow the header in arrival order
+        // delimiter + transcript + prompt + image = 4 blocks
+        assert_eq!(blocks.len(), 4);
+        assert!(matches!(&blocks[0], ContentBlock::Text { text } if text.contains("<sender_context>")));
         assert!(matches!(&blocks[1], ContentBlock::Text { text } if text.contains("Voice transcript")));
-        assert!(matches!(&blocks[2], ContentBlock::Image { .. }));
+        assert!(matches!(&blocks[2], ContentBlock::Text { text } if text == "prompt"));
+        assert!(matches!(&blocks[3], ContentBlock::Image { .. }));
     }
 
     #[test]
     fn pack_arrival_event_batch_n2() {
-        // Two arrival events concatenated → 2 header blocks
+        // Two arrival events concatenated → 2 (header + prompt) pairs = 4 blocks.
         let mut all: Vec<ContentBlock> = Vec::new();
         all.extend(AdapterRouter::pack_arrival_event(r#"{"ts":"T1"}"#, "msg1", vec![]));
         all.extend(AdapterRouter::pack_arrival_event(r#"{"ts":"T2"}"#, "msg2", vec![]));
-        assert_eq!(all.len(), 2);
+        assert_eq!(all.len(), 4);
         if let ContentBlock::Text { text } = &all[0] {
-            assert!(text.contains("msg1"));
+            assert!(text.contains(r#""ts":"T1""#));
+            assert!(!text.contains("msg1"));
         }
         if let ContentBlock::Text { text } = &all[1] {
-            assert!(text.contains("msg2"));
+            assert_eq!(text, "msg1");
+        }
+        if let ContentBlock::Text { text } = &all[2] {
+            assert!(text.contains(r#""ts":"T2""#));
+            assert!(!text.contains("msg2"));
+        }
+        if let ContentBlock::Text { text } = &all[3] {
+            assert_eq!(text, "msg2");
         }
     }
 
@@ -779,30 +796,32 @@ mod tests {
                 data: "imgB".into(),
             }],
         ));
-        // header(M1) + header(M2) + image(M2) = 3 blocks
-        assert_eq!(all.len(), 3);
-        // M1's header carries text only
+        // header(M1) + prompt(M1) + header(M2) + image(M2) = 4 blocks
+        // (M2 has empty prompt, so its prompt block is omitted)
+        assert_eq!(all.len(), 4);
         if let ContentBlock::Text { text } = &all[0] {
             assert!(text.contains(r#""sender_id":"A""#));
             assert!(text.contains(r#""ts":"T1""#));
-            assert!(text.contains("see this image"));
         } else {
-            panic!("expected Text header for M1");
+            panic!("expected Text delimiter for M1");
         }
-        // M2's header carries empty prompt (line after </sender_context> is blank)
         if let ContentBlock::Text { text } = &all[1] {
+            assert_eq!(text, "see this image");
+        } else {
+            panic!("expected Text prompt for M1");
+        }
+        if let ContentBlock::Text { text } = &all[2] {
             assert!(text.contains(r#""ts":"T2""#));
-            assert!(text.ends_with("\n\n"), "M2 prompt must be empty: {text:?}");
         } else {
-            panic!("expected Text header for M2");
+            panic!("expected Text delimiter for M2");
         }
-        // M2's image follows immediately after its header (structural attribution)
-        assert!(matches!(&all[2], ContentBlock::Image { .. }));
+        // M2's image follows immediately after its delimiter (no prompt block).
+        assert!(matches!(&all[3], ContentBlock::Image { .. }));
     }
 
     // ADR §3.6 Scenario C — fragmented multi-author batch.
     // Repeated sender_id is preserved across non-adjacent messages; bob's interjection
-    // is kept as-is (no silent drop, no reorder).
+    // is kept as-is (no silent drop, no temporal reorder).
     #[test]
     fn pack_arrival_event_scenario_c_multi_author_interleaved() {
         let mut all: Vec<ContentBlock> = Vec::new();
@@ -824,31 +843,44 @@ mod tests {
                 data: "imgC".into(),
             }],
         ));
-        // 3 headers + 1 image = 4 blocks
-        assert_eq!(all.len(), 4);
-        // Order is preserved (no reorder).
+        // M1: header + prompt = 2 blocks
+        // M2: header + prompt = 2 blocks
+        // M3: header + image = 2 blocks (empty prompt → no prompt block)
+        // total = 6
+        assert_eq!(all.len(), 6);
         let h1 = match &all[0] {
             ContentBlock::Text { text } => text,
-            _ => panic!("expected Text"),
+            _ => panic!("expected Text delimiter for M1"),
+        };
+        let p1 = match &all[1] {
+            ContentBlock::Text { text } => text,
+            _ => panic!("expected Text prompt for M1"),
+        };
+        let h2 = match &all[2] {
+            ContentBlock::Text { text } => text,
+            _ => panic!("expected Text delimiter for M2"),
         };
-        let h2 = match &all[1] {
+        let p2 = match &all[3] {
             ContentBlock::Text { text } => text,
-            _ => panic!("expected Text"),
+            _ => panic!("expected Text prompt for M2"),
         };
-        let h3 = match &all[2] {
+        let h3 = match &all[4] {
             ContentBlock::Text { text } => text,
-            _ => panic!("expected Text"),
+            _ => panic!("expected Text delimiter for M3"),
         };
-        assert!(h1.contains(r#""sender_id":"A""#) && h1.contains("see this image"));
-        assert!(h2.contains(r#""sender_id":"B""#) && h2.contains("what?"));
-        assert!(h3.contains(r#""sender_id":"A""#));
+        assert!(h1.contains(r#""sender_id":"A""#) && h1.contains(r#""ts":"T1""#));
+        assert_eq!(p1, "see this image");
+        assert!(h2.contains(r#""sender_id":"B""#) && h2.contains(r#""ts":"T2""#));
+        assert_eq!(p2, "what?");
+        assert!(h3.contains(r#""sender_id":"A""#) && h3.contains(r#""ts":"T3""#));
         // M3's image attached to M3 only.
-        assert!(matches!(&all[3], ContentBlock::Image { .. }));
+        assert!(matches!(&all[5], ContentBlock::Image { .. }));
     }
 
     // ADR §3.6 Scenario D — voice-only message in a batch.
-    // M2 has empty prompt + transcript text block. Per ADR, transcript moves AFTER
-    // <sender_context> (vs. v0.8.2-beta.1's prepended position).
+    // Within each arrival, transcript Text blocks precede the prompt block so the
+    // agent sees voice content before any typed text. The sender_context delimiter
+    // still opens each arrival.
     #[test]
     fn pack_arrival_event_scenario_d_voice_only() {
         let mut all: Vec<ContentBlock> = Vec::new();
@@ -872,27 +904,34 @@ mod tests {
             "what?",
             vec![],
         ));
-        // header(M1) + image(M1) + header(M2) + transcript(M2) + header(M3) = 5
-        assert_eq!(all.len(), 5);
+        // M1: header + prompt + image = 3
+        // M2: header + transcript = 2 (empty prompt → no prompt block)
+        // M3: header + prompt = 2
+        // total = 7
+        assert_eq!(all.len(), 7);
         if let ContentBlock::Text { text } = &all[0] {
             assert!(text.contains(r#""ts":"T1""#));
-            assert!(text.contains("look at this"));
+            assert!(!text.contains("look at this"));
         }
-        assert!(matches!(&all[1], ContentBlock::Image { .. }));
-        // M2 header has empty prompt; transcript follows AFTER the header (not before).
-        if let ContentBlock::Text { text } = &all[2] {
-            assert!(text.contains(r#""ts":"T2""#));
-            assert!(text.ends_with("\n\n"));
+        if let ContentBlock::Text { text } = &all[1] {
+            assert_eq!(text, "look at this");
         }
+        assert!(matches!(&all[2], ContentBlock::Image { .. }));
         if let ContentBlock::Text { text } = &all[3] {
+            assert!(text.contains(r#""ts":"T2""#));
+        }
+        // Transcript precedes prompt (and prompt is omitted here because empty).
+        if let ContentBlock::Text { text } = &all[4] {
             assert!(text.contains("Voice message transcript"));
             assert!(text.contains("sync about the deploy"));
         } else {
-            panic!("expected transcript Text block as M2 attachment");
+            panic!("expected transcript Text block after M2 delimiter");
         }
-        if let ContentBlock::Text { text } = &all[4] {
+        if let ContentBlock::Text { text } = &all[5] {
             assert!(text.contains(r#""sender_id":"B""#));
-            assert!(text.contains("what?"));
+        }
+        if let ContentBlock::Text { text } = &all[6] {
+            assert_eq!(text, "what?");
         }
     }
 
@@ -1286,8 +1325,8 @@ mod tests {
     async fn consumer_dispatches_single_message_as_one_batch() {
         let calls = run_consumer_with_messages(vec![make_msg("hi", 10)], 10, 24_000).await;
         assert_eq!(calls.len(), 1);
-        // pack_arrival_event with no extra_blocks → 1 Text block per message.
-        assert_eq!(calls[0].block_count, 1);
+        // pack_arrival_event with no extra_blocks → delimiter + prompt = 2 blocks.
+        assert_eq!(calls[0].block_count, 2);
         assert!(!calls[0].other_bot_present);
     }
 
@@ -1302,7 +1341,8 @@ mod tests {
         )
         .await;
         assert_eq!(calls.len(), 1, "expected a single batched dispatch");
-        assert_eq!(calls[0].block_count, 3, "one Text block per arrival event");
+        // 3 arrivals × (delimiter + prompt) = 6 blocks.
+        assert_eq!(calls[0].block_count, 6);
     }
 
     #[tokio::test]
@@ -1312,8 +1352,9 @@ mod tests {
         let calls =
             run_consumer_with_messages(vec![make_msg("a", 80), make_msg("b", 80)], 10, 100).await;
         assert_eq!(calls.len(), 2, "token cap should split into two batches");
-        assert_eq!(calls[0].block_count, 1);
-        assert_eq!(calls[1].block_count, 1);
+        // Each batch holds one arrival → delimiter + prompt = 2 blocks.
+        assert_eq!(calls[0].block_count, 2);
+        assert_eq!(calls[1].block_count, 2);
     }
 
     #[tokio::test]
@@ -1379,7 +1420,8 @@ mod tests {
 
         let calls = mock.calls();
         assert_eq!(calls.len(), 1, "fresh consumer should have dispatched the retry");
-        assert_eq!(calls[0].block_count, 1);
+        // pack_arrival_event with no extra_blocks → delimiter + prompt = 2 blocks.
+        assert_eq!(calls[0].block_count, 2);
 
         parked.abort();
     }

From 26c7bf8c2a4eb369a499246c5750cb4fe013dbaf Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 10:31:57 +0000
Subject: [PATCH 21/32] fix(gateway): import AdapterRouter so
 handle_config_command compiles

handle_config_command's signature uses &AdapterRouter but only
crate::adapter::{ChannelRef, ChatAdapter, MessageRef, SenderContext}
were imported, so cargo check failed with E0425. Add AdapterRouter to
the use list (the other reference at line 482 already uses the fully
qualified path).
---
 src/gateway.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gateway.rs b/src/gateway.rs
index b5498ca7..21ccf155 100644
--- a/src/gateway.rs
+++ b/src/gateway.rs
@@ -1,4 +1,4 @@
-use crate::adapter::{ChannelRef, ChatAdapter, MessageRef, SenderContext};
+use crate::adapter::{AdapterRouter, ChannelRef, ChatAdapter, MessageRef, SenderContext};
 use anyhow::Result;
 use async_trait::async_trait;
 use futures_util::{SinkExt, StreamExt};

From 0f339e23315b1efd6f9beb563b183c375f0422c8 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 13:42:51 +0000
Subject: [PATCH 22/32] fix(timestamp): parse Slack ts as f64 to preserve
 decimal semantics

Previously slack_ts_to_iso8601 split on '.' and parsed the fractional
substring as an integer, treating ".12" as 12 ms instead of 120 ms.
Parsing the entire string as f64 carries decimal semantics correctly
without any string-padding logic.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/timestamp.rs | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/timestamp.rs b/src/timestamp.rs
index 485a7864..e6c8d49f 100644
--- a/src/timestamp.rs
+++ b/src/timestamp.rs
@@ -34,11 +34,13 @@ fn unix_to_iso8601(secs: u64, ms: u64) -> String {
 
 /// Convert a Slack `ts` string ("<unix_seconds>.<microseconds>") to ISO 8601 UTC.
 /// Best-effort; falls back to epoch on parse failure.
+///
+/// Parses as `f64` so the fractional part carries decimal semantics directly —
+/// ".12" maps to 120 ms, not 12 ms — without any string-padding gymnastics.
 pub fn slack_ts_to_iso8601(ts: &str) -> String {
-    let mut parts = ts.splitn(2, '.');
-    let secs = parts.next().unwrap_or("0").parse::<u64>().unwrap_or(0);
-    let frac = parts.next().unwrap_or("000");
-    let ms: u64 = frac.chars().take(3).collect::<String>().parse().unwrap_or(0);
+    let total = ts.parse::<f64>().unwrap_or(0.0);
+    let secs = total.trunc() as u64;
+    let ms = (total.fract() * 1000.0).round() as u64;
     unix_to_iso8601(secs, ms)
 }
 
@@ -70,6 +72,18 @@ mod tests {
         assert_eq!(slack_ts_to_iso8601("1714204397"), "2024-04-27T07:53:17.000Z");
     }
 
+    #[test]
+    fn slack_ts_two_digit_fraction_is_120ms_not_12ms() {
+        // ".12" carries decimal semantics: 0.12 s = 120 ms.
+        assert_eq!(slack_ts_to_iso8601("1714204397.12"), "2024-04-27T07:53:17.120Z");
+    }
+
+    #[test]
+    fn slack_ts_one_digit_fraction_is_100ms_not_1ms() {
+        // ".1" carries decimal semantics: 0.1 s = 100 ms.
+        assert_eq!(slack_ts_to_iso8601("1714204397.1"), "2024-04-27T07:53:17.100Z");
+    }
+
     #[test]
     fn slack_ts_unparseable_falls_back_to_epoch() {
         assert_eq!(slack_ts_to_iso8601("not-a-ts"), "1970-01-01T00:00:00.000Z");

From 956461b6003375c29abbde7d0aea1fabebd8ec83 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 13:42:57 +0000
Subject: [PATCH 23/32] refactor(discord): drop approximate count from
 /cancel-all message

The buffered-message count is approximate (sweep races with new
arrivals) so surfacing an exact number to users was misleading. Show
a binary "cleared / nothing" signal instead. The pending_count() API
stays for logs and metrics.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/discord.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/discord.rs b/src/discord.rs
index 64f5453b..58ce456a 100644
--- a/src/discord.rs
+++ b/src/discord.rs
@@ -904,11 +904,13 @@ impl Handler {
 
         let cancel_result = self.router.pool().cancel_session(&session_key).await;
 
+        // Buffer count is approximate (sweep races with new arrivals) so we surface
+        // a binary "cleared / nothing" signal rather than a misleading exact number.
         let msg = match (cancel_result, dropped) {
             (Ok(()), 0) => "🛑 Cancel signal sent.".to_string(),
-            (Ok(()), n) => format!("🛑 Cancel signal sent. Dropped {n} buffered message(s)."),
+            (Ok(()), _) => "🛑 Cancel signal sent. Buffered messages cleared.".to_string(),
             (Err(_), 0) => "⚠️ Nothing to cancel — no active session and no buffered messages.".to_string(),
-            (Err(_), n) => format!("🛑 Dropped {n} buffered message(s). No active session to cancel."),
+            (Err(_), _) => "🛑 Buffered messages cleared. No active session to cancel.".to_string(),
         };
 
         let response = CreateInteractionResponse::Message(

From 3f19d48554663317e7057d78e12bae4105ad6d3f Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 13:43:24 +0000
Subject: [PATCH 24/32] docs(dispatch): annotate per_thread mutex lock sites
 with SAFETY comments

Make the no-.await-while-locked invariant explicit at each lock
acquisition site so future edits can't silently introduce an .await
without tripping the comment. The struct-level note at line 183 stays
as the higher-level explanation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 1c74b529..6fba79b0 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -284,6 +284,7 @@ impl Dispatcher {
         let next_g = self.next_generation.fetch_add(1, Ordering::Relaxed);
 
         let (tx, my_generation) = {
+            // SAFETY: no .await while this guard is held — guard drops at end of block.
             let mut map = self.per_thread.lock().unwrap();
 
             // Proactive stale-entry cleanup: if the consumer has exited (idle
@@ -328,6 +329,7 @@ impl Dispatcher {
             // deadlock against the await point. The same property holds for the
             // retry acquisition below.
             {
+                // SAFETY: no .await while this guard is held.
                 let mut map = self.per_thread.lock().unwrap();
                 Self::try_evict_locked(&mut map, &thread_key, my_generation);
             }
@@ -337,6 +339,7 @@ impl Dispatcher {
             // surface the error to the user.
             let retry_g = self.next_generation.fetch_add(1, Ordering::Relaxed);
             let (retry_tx, retry_gen) = {
+                // SAFETY: no .await while this guard is held — guard drops at end of block.
                 let mut map = self.per_thread.lock().unwrap();
                 let entry = map.entry(thread_key.clone()).or_insert_with(|| {
                     let (tx, rx) = tokio::sync::mpsc::channel(cap);
@@ -364,6 +367,7 @@ impl Dispatcher {
             if let Err(e2) = retry_tx.send(failed_msg).await {
                 // Retry also failed — truly unexpected. Surface error.
                 {
+                    // SAFETY: no .await while this guard is held.
                     let mut map = self.per_thread.lock().unwrap();
                     Self::try_evict_locked(&mut map, &thread_key, retry_gen);
                 }
@@ -400,6 +404,7 @@ impl Dispatcher {
     pub fn cancel_buffered_thread(&self, platform: &str, thread_id: &str) -> usize {
         let prefix = format!("{platform}:{thread_id}");
         let lane_prefix = format!("{prefix}:");
+        // SAFETY: no .await while this guard is held — function is sync.
         let mut map = self.per_thread.lock().unwrap();
         let keys: Vec<String> = map
             .keys()
@@ -440,6 +445,7 @@ impl Dispatcher {
     /// to prevent unbounded map growth from one-shot thread keys that never
     /// receive a second `submit()`. Returns the number of entries swept.
     pub fn sweep_stale(&self) -> usize {
+        // SAFETY: no .await while this guard is held — function is sync.
         let mut map = self.per_thread.lock().unwrap();
         let before = map.len();
         map.retain(|_, handle| !handle.consumer.is_finished());
@@ -448,6 +454,7 @@ impl Dispatcher {
 
     /// Log buffered-message counts and drop all handles (called on SIGTERM).
     pub fn shutdown(&self) {
+        // SAFETY: no .await while this guard is held — function is sync.
         let mut map = self.per_thread.lock().unwrap();
         for (thread_id, handle) in map.iter() {
             let pending = handle.pending_count();

From 93765a145955aee3d77fd494282756e68782dc5d Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 13:43:42 +0000
Subject: [PATCH 25/32] refactor(dispatch): apply queued reactions sequentially

Replace futures_util::future::join_all with a sequential await loop.
Batches are typically small (low single digits) so the serialization
cost is sub-second and not user-visible, and the dispatch path no
longer pulls in join_all just for one call.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 6fba79b0..e022bfa7 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -568,14 +568,13 @@ async fn dispatch_batch(
     let session_key = Dispatcher::session_key(thread_channel);
 
     // Apply 👀 reaction to every message in the batch before dispatch (§6.7).
-    // Parallelized so first-token latency isn't paid for N serial reaction RPCs.
+    // Sequential — batches are typically small (≤ low single digits) so the
+    // serialization cost is sub-second and not user-visible; sequential keeps the
+    // dispatch path free of `futures_util::join_all` and easier to reason about.
     let queued_emoji = &target.reactions_config().emojis.queued;
-    futures_util::future::join_all(
-        batch
-            .iter()
-            .map(|msg| adapter.add_reaction(&msg.trigger_msg, queued_emoji)),
-    )
-    .await;
+    for msg in batch.iter() {
+        let _ = adapter.add_reaction(&msg.trigger_msg, queued_emoji).await;
+    }
 
     // Collect per-event observability data (before consuming the batch).
     let tokens_per_event: Vec<usize> = batch.iter().map(|m| m.estimated_tokens).collect();

From 963b96c444256fbd06ea77b7e1081c1519c39680 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 13:43:53 +0000
Subject: [PATCH 26/32] refactor(dispatch): per-mode consumer idle timeout (10s
 for per-message)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per-message mode (cap=1) doesn't benefit from holding consumers across
message gaps — there is no batch window to preserve — so a 5-minute
idle timeout left consumer tasks lingering long after they were useful.
Add PER_MESSAGE_CONSUMER_IDLE_TIMEOUT (10s), wire it through main.rs
based on each adapter's message_processing_mode, and drop the unused
Dispatcher::new wrapper.

By Little's Law (steady-state idle count = arrival rate × idle window),
this cuts per-message-mode idle dispatcher footprint by 30x for the
same arrival rate while keeping batched modes' 5-minute window so
between-trigger lanes aren't torn down on every message.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 42 +++++++++++++++++++-----------------------
 src/main.rs     | 33 ++++++++++++++++++---------------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index e022bfa7..2c19cc4d 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -169,12 +169,22 @@ impl DispatchTarget for AdapterRouter {
 // Dispatcher
 // ---------------------------------------------------------------------------
 
-/// Default idle timeout for per-thread consumer tasks. When no message arrives
-/// within this window the consumer exits, allowing `per_thread` map cleanup on
-/// the next `submit` (via `SendError` → `try_evict_locked`). Prevents unbounded
-/// task/memory growth from one-shot thread keys (e.g. Slack non-thread messages).
+/// Default idle timeout for per-thread consumer tasks in batched modes (Thread / Lane).
+/// When no message arrives within this window the consumer exits, allowing `per_thread`
+/// map cleanup on the next `submit` (via `SendError` → `try_evict_locked`). Prevents
+/// unbounded task/memory growth from one-shot thread keys (e.g. Slack non-thread messages).
+///
+/// Batched modes need a longer window so a lane that's between trigger arrivals isn't
+/// torn down and respawned on every message.
 pub const DEFAULT_CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(300);
 
+/// Idle timeout for per-message mode (cap=1, no batching). Per-message dispatchers
+/// don't benefit from holding consumers across message gaps — there is no batch
+/// window to preserve — so a much shorter timeout reduces idle resource footprint
+/// from one-shot thread keys (Little's Law: steady-state idle count = arrival rate
+/// × idle window).
+pub const PER_MESSAGE_CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(10);
+
 /// Per-thread message dispatcher for batched mode.
 ///
 /// Constructed once in `main.rs` and shared via `Arc`. Platform adapters call
@@ -194,23 +204,9 @@ pub struct Dispatcher {
 }
 
 impl Dispatcher {
-    pub fn new(
-        target: Arc<dyn DispatchTarget>,
-        max_buffered_messages: usize,
-        max_batch_tokens: usize,
-        grouping: BatchGrouping,
-    ) -> Self {
-        Self::with_idle_timeout(
-            target,
-            max_buffered_messages,
-            max_batch_tokens,
-            grouping,
-            DEFAULT_CONSUMER_IDLE_TIMEOUT,
-        )
-    }
-
-    /// Like `new`, but with a custom consumer idle timeout. Test-only knob —
-    /// production code should use `new` (which applies `DEFAULT_CONSUMER_IDLE_TIMEOUT`).
+    /// Construct a dispatcher with an explicit consumer idle timeout. Per-mode
+    /// callers in `main.rs` pass `PER_MESSAGE_CONSUMER_IDLE_TIMEOUT` for cap=1
+    /// dispatchers and `DEFAULT_CONSUMER_IDLE_TIMEOUT` for batched modes.
     pub fn with_idle_timeout(
         target: Arc<dyn DispatchTarget>,
         max_buffered_messages: usize,
@@ -1027,7 +1023,7 @@ mod tests {
             crate::config::ReactionsConfig::default(),
             crate::markdown::TableMode::Off,
         ));
-        Dispatcher::new(router, 10, 24_000, grouping)
+        Dispatcher::with_idle_timeout(router, 10, 24_000, grouping, DEFAULT_CONSUMER_IDLE_TIMEOUT)
     }
 
     #[tokio::test]
@@ -1398,7 +1394,7 @@ mod tests {
         // whose consumer is still parked but whose rx has been dropped.
         let mock = Arc::new(MockDispatchTarget::new());
         let target: Arc<dyn DispatchTarget> = mock.clone();
-        let d = Dispatcher::new(target, 10, 24_000, BatchGrouping::Thread);
+        let d = Dispatcher::with_idle_timeout(target, 10, 24_000, BatchGrouping::Thread, DEFAULT_CONSUMER_IDLE_TIMEOUT);
         let adapter: Arc<dyn ChatAdapter> = Arc::new(MockChatAdapter);
 
         let key = "mock:T".to_string();
diff --git a/src/main.rs b/src/main.rs
index 6372b680..d6c6797b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -202,19 +202,20 @@ async fn main() -> anyhow::Result<()> {
         // Dispatcher is the sole serialization path for all modes. Message = cap 1
         // (each message dispatches alone, FIFO). Thread / Lane = configured cap;
         // grouping decides whether senders share a buffer or get their own lane.
-        let (slack_cap, slack_grouping) = match slack_cfg.message_processing_mode {
+        let (slack_cap, slack_grouping, slack_idle) = match slack_cfg.message_processing_mode {
             config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread),
+                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Thread =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Lane =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
+                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
         };
-        let slack_dispatcher = Arc::new(dispatch::Dispatcher::new(
+        let slack_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             slack_cap,
             slack_cfg.max_batch_tokens,
             slack_grouping,
+            slack_idle,
         ));
         dispatchers.lock().unwrap().push(slack_dispatcher.clone());
         Some(tokio::spawn(async move {
@@ -247,19 +248,20 @@ async fn main() -> anyhow::Result<()> {
         let router = router.clone();
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
-        let (gw_cap, gw_grouping) = match gw_cfg.message_processing_mode {
+        let (gw_cap, gw_grouping, gw_idle) = match gw_cfg.message_processing_mode {
             config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread),
+                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Thread =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Lane =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
+                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
         };
-        let gw_dispatcher = Arc::new(dispatch::Dispatcher::new(
+        let gw_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             gw_cap,
             gw_cfg.max_batch_tokens,
             gw_grouping,
+            gw_idle,
         ));
         dispatchers.lock().unwrap().push(gw_dispatcher.clone());
         let params = gateway::GatewayParams {
@@ -346,19 +348,20 @@ async fn main() -> anyhow::Result<()> {
             "starting discord adapter"
         );
 
-        let (discord_cap, discord_grouping) = match discord_cfg.message_processing_mode {
+        let (discord_cap, discord_grouping, discord_idle) = match discord_cfg.message_processing_mode {
             config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread),
+                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Thread =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread),
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
             config::MessageProcessingMode::Lane =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane),
+                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
         };
-        let discord_dispatcher = Arc::new(dispatch::Dispatcher::new(
+        let discord_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             discord_cap,
             discord_cfg.max_batch_tokens,
             discord_grouping,
+            discord_idle,
         ));
         dispatchers.lock().unwrap().push(discord_dispatcher.clone());
 

From aea678253ef0049a63b583ed11dbaebf8c33c1b1 Mon Sep 17 00:00:00 2001
From: Brett Chien <1193046+brettchien@users.noreply.github.com>
Date: Tue, 5 May 2026 17:18:22 +0000
Subject: [PATCH 27/32] refactor(dispatch): extract dispatch_params, name
 token-estimate consts, fix ADR path

- main.rs: collapse 3x repeated (cap, grouping, idle) match blocks into
  dispatch::dispatch_params(mode, max_buffered).
- dispatch.rs: replace magic 4 / 512 in estimate_tokens with named
  CHARS_PER_TOKEN_ESTIMATE / TOKENS_PER_IMAGE_ESTIMATE constants.
- dispatch.rs: fix top-level ADR reference to point at the actual
  docs/adr/turn-boundary-batching.md path landing in #598.

Addresses chaodu-agent NITs #1, #2, #5 from PR #686.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/dispatch.rs | 33 ++++++++++++++++++++++++++++-----
 src/main.rs     | 36 ++++++++++++------------------------
 2 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/src/dispatch.rs b/src/dispatch.rs
index 2c19cc4d..1667521c 100644
--- a/src/dispatch.rs
+++ b/src/dispatch.rs
@@ -1,6 +1,6 @@
 //! Turn-boundary message batching dispatcher.
 //!
-//! See ADR: turn-boundary-batching-adr.md for full design rationale.
+//! See ADR: docs/adr/turn-boundary-batching.md for full design rationale.
 //!
 //! # Invariants
 //! - I1: First message after idle has zero added latency.
@@ -185,6 +185,23 @@ pub const DEFAULT_CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(300);
 /// × idle window).
 pub const PER_MESSAGE_CONSUMER_IDLE_TIMEOUT: Duration = Duration::from_secs(10);
 
+/// Resolve `(cap, grouping, idle_timeout)` for a given processing mode.
+///
+/// Per-message mode forces cap=1 + Thread grouping + the short per-message idle
+/// (one-shot threads shouldn't pin a consumer for 5 min); Thread / Lane modes
+/// use the configured `max_buffered` and the default idle window.
+pub fn dispatch_params(
+    mode: &crate::config::MessageProcessingMode,
+    max_buffered: usize,
+) -> (usize, BatchGrouping, Duration) {
+    use crate::config::MessageProcessingMode;
+    match mode {
+        MessageProcessingMode::Message => (1, BatchGrouping::Thread, PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
+        MessageProcessingMode::Thread => (max_buffered, BatchGrouping::Thread, DEFAULT_CONSUMER_IDLE_TIMEOUT),
+        MessageProcessingMode::Lane => (max_buffered, BatchGrouping::Lane, DEFAULT_CONSUMER_IDLE_TIMEOUT),
+    }
+}
+
 /// Per-thread message dispatcher for batched mode.
 ///
 /// Constructed once in `main.rs` and shared via `Arc`. Platform adapters call
@@ -671,16 +688,22 @@ async fn dispatch_batch(
 // Token estimation
 // ---------------------------------------------------------------------------
 
+/// Rough char-to-token ratio for English-ish text. Coarse on purpose — the goal
+/// is a guard rail for `max_batch_tokens`, not an exact pre-flight.
+const CHARS_PER_TOKEN_ESTIMATE: usize = 4;
+/// Conservative per-image token budget. Larger than typical Claude image cost
+/// so the cap trips before we hand the model an oversized batch.
+const TOKENS_PER_IMAGE_ESTIMATE: usize = 512;
+
 /// Rough token estimate for a buffered message (used for `max_batch_tokens` cap).
 /// Intentionally coarse — the goal is a guard rail, not an exact pre-flight.
 pub fn estimate_tokens(prompt: &str, extra_blocks: &[ContentBlock]) -> usize {
-    // ~4 chars per token for text; fixed 512 per image block (conservative).
-    let text_tokens = prompt.len() / 4 + 1;
+    let text_tokens = prompt.len() / CHARS_PER_TOKEN_ESTIMATE + 1;
     let block_tokens: usize = extra_blocks
         .iter()
         .map(|b| match b {
-            ContentBlock::Text { text } => text.len() / 4 + 1,
-            ContentBlock::Image { .. } => 512,
+            ContentBlock::Text { text } => text.len() / CHARS_PER_TOKEN_ESTIMATE + 1,
+            ContentBlock::Image { .. } => TOKENS_PER_IMAGE_ESTIMATE,
         })
         .sum();
     text_tokens + block_tokens
diff --git a/src/main.rs b/src/main.rs
index d6c6797b..e77c3c22 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -202,14 +202,10 @@ async fn main() -> anyhow::Result<()> {
         // Dispatcher is the sole serialization path for all modes. Message = cap 1
         // (each message dispatches alone, FIFO). Thread / Lane = configured cap;
         // grouping decides whether senders share a buffer or get their own lane.
-        let (slack_cap, slack_grouping, slack_idle) = match slack_cfg.message_processing_mode {
-            config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Thread =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Lane =>
-                (slack_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-        };
+        let (slack_cap, slack_grouping, slack_idle) = dispatch::dispatch_params(
+            &slack_cfg.message_processing_mode,
+            slack_cfg.max_buffered_messages,
+        );
         let slack_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             slack_cap,
@@ -248,14 +244,10 @@ async fn main() -> anyhow::Result<()> {
         let router = router.clone();
         let shutdown_rx = shutdown_rx.clone();
         info!(url = %gw_cfg.url, "starting gateway adapter");
-        let (gw_cap, gw_grouping, gw_idle) = match gw_cfg.message_processing_mode {
-            config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Thread =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Lane =>
-                (gw_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-        };
+        let (gw_cap, gw_grouping, gw_idle) = dispatch::dispatch_params(
+            &gw_cfg.message_processing_mode,
+            gw_cfg.max_buffered_messages,
+        );
         let gw_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             gw_cap,
@@ -348,14 +340,10 @@ async fn main() -> anyhow::Result<()> {
             "starting discord adapter"
         );
 
-        let (discord_cap, discord_grouping, discord_idle) = match discord_cfg.message_processing_mode {
-            config::MessageProcessingMode::Message =>
-                (1, dispatch::BatchGrouping::Thread, dispatch::PER_MESSAGE_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Thread =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Thread, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-            config::MessageProcessingMode::Lane =>
-                (discord_cfg.max_buffered_messages, dispatch::BatchGrouping::Lane, dispatch::DEFAULT_CONSUMER_IDLE_TIMEOUT),
-        };
+        let (discord_cap, discord_grouping, discord_idle) = dispatch::dispatch_params(
+            &discord_cfg.message_processing_mode,
+            discord_cfg.max_buffered_messages,
+        );
         let discord_dispatcher = Arc::new(dispatch::Dispatcher::with_idle_timeout(
             router.clone(),
             discord_cap,

From 1bc3c0c358f00bde995ad1dad7da51a2df008b12 Mon Sep 17 00:00:00 2001
From: chaodu-agent <chaodu-agent@users.noreply.github.com>
Date: Tue, 5 May 2026 20:04:44 +0000
Subject: [PATCH 28/32] docs: clarify schema evolution comment + dispatchers
 triple-Arc rationale
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- adapter.rs: note that future breaking changes should bump to v1.1+
- main.rs: explain why Arc<Mutex<Vec<Arc<Dispatcher>>>> is necessary
  (shared with cleanup task + shutdown; pushes at startup only)

Addresses maintainer NITs from PR #686 review.

Co-Authored-By: 超渡法師 <chaodu-agent@users.noreply.github.com>
---
 src/adapter.rs | 3 ++-
 src/main.rs    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/adapter.rs b/src/adapter.rs
index 0c2753c9..1bf1536d 100644
--- a/src/adapter.rs
+++ b/src/adapter.rs
@@ -89,7 +89,8 @@ pub struct SenderContext {
     pub is_bot: bool,
     /// Platform message creation time (ISO 8601 UTC), if available.
     /// Discord/Slack: platform timestamp. Gateway: broker receive time (best-effort).
-    /// Additive optional field — schema stays openab.sender.v1.
+    /// Additive optional field — schema version stays openab.sender.v1 (no consumer
+    /// breakage). If future additions require breaking changes, bump to v1.1+.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub timestamp: Option<String>,
 }
diff --git a/src/main.rs b/src/main.rs
index e77c3c22..6037c9ae 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -146,6 +146,9 @@ async fn main() -> anyhow::Result<()> {
 
     // Dispatcher handles tracked here so SIGTERM cleanup can call shutdown() on each (ADR §6.8).
     // Also shared with the cleanup task for periodic stale-entry sweeping.
+    // Arc<Mutex<Vec<…>>> because: outer Arc shared with cleanup task + shutdown,
+    // Mutex guards startup-time pushes, inner Arc<Dispatcher> shared with each adapter.
+    // All pushes happen at startup; runtime access is read-only (lock is uncontended).
     let dispatchers: Arc<Mutex<Vec<Arc<dispatch::Dispatcher>>>> = Arc::new(Mutex::new(Vec::new()));
 
     // Spawn cleanup task

From 73f4cc536fa9cff3a573af677e30a41dd1c7eeec Mon Sep 17 00:00:00 2001
From: chaodu-agent <chaodu-agent@users.noreply.github.com>
Date: Tue, 5 May 2026 20:08:54 +0000
Subject: [PATCH 29/32] docs: add message dispatch modes guide (per-message vs
 per-thread vs per-lane)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Decision guide for operators choosing between the three modes, with
config examples and trade-off explanations.

Co-Authored-By: 超渡法師 <chaodu-agent@users.noreply.github.com>
---
 docs/message-dispatch-modes.md | 95 ++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 docs/message-dispatch-modes.md

diff --git a/docs/message-dispatch-modes.md b/docs/message-dispatch-modes.md
new file mode 100644
index 00000000..d22ad088
--- /dev/null
+++ b/docs/message-dispatch-modes.md
@@ -0,0 +1,95 @@
+# Message Dispatch Modes
+
+OpenAB supports three message dispatch modes that control how incoming messages are batched before being sent to the AI agent as an ACP turn.
+
+## Modes
+
+### `per-message` (default)
+
+Each message triggers its own ACP turn. This is the v0.8.2 behavior — simple, predictable, no batching.
+
+**Use when:**
+- Single-user threads (most common case)
+- Low message volume
+- You want the simplest mental model (1 message = 1 agent response)
+
+**Trade-off:** If a user sends 3 messages quickly while the agent is processing, each becomes a separate turn — the agent sees them independently and responds 3 times.
+
+### `per-thread`
+
+All messages in a thread share one buffer. Messages that arrive while the agent is processing are batched into a single ACP turn at the next turn boundary.
+
+**Use when:**
+- High-traffic threads where multiple users address the bot simultaneously
+- You want to minimize token cost (one context window serves N messages)
+- The agent is expected to address all senders in a single reply
+
+**Trade-off:** If Alice and Bob both message in the same thread, they share one batch. The agent gets one turn to address both — if it only replies to Alice, Bob's message is effectively "silent-dropped" (delivered but not addressed).
+
+### `per-lane`
+
+Each (thread, sender) pair gets its own buffer. Messages from the same sender batch together, but different senders get independent ACP turns.
+
+**Use when:**
+- Multi-user threads where each sender expects a dedicated response
+- Multi-agent collaboration (bot-to-bot threads)
+- You want batching benefits without silent-drop risk
+
+**Trade-off:** More ACP turns than `per-thread` (one per active sender per turn boundary), so higher token cost. Turns serialize through the shared session — sender B waits for sender A's turn to complete.
+
+## Decision Guide
+
+```
+Is this a single-user bot (1 human per thread)?
+  → per-message (default, simplest)
+
+Multiple humans in the same thread?
+  ├─ Is it OK if the agent addresses everyone in one reply?
+  │    → per-thread (cheapest)
+  └─ Each person needs their own response?
+       → per-lane (safest)
+
+Multi-agent collaboration (bot-to-bot)?
+  → per-lane (each bot gets its own turn)
+```
+
+## Configuration
+
+### config.toml
+
+```toml
+[discord]
+message_processing_mode = "per-lane"   # "per-message" | "per-thread" | "per-lane"
+max_buffered_messages = 10             # per-thread mpsc capacity (batched modes only)
+max_batch_tokens = 24000               # soft token cap per ACP turn (batched modes only)
+```
+
+### Helm values
+
+```yaml
+agents:
+  kiro:
+    discord:
+      messageProcessingMode: "per-lane"
+      maxBufferedMessages: 10
+      maxBatchTokens: 24000
+```
+
+The same fields are available under `slack:` and `gateway:` sections.
+
+## How Batching Works
+
+1. **First message after idle** — dispatched immediately (zero added latency).
+2. **Subsequent messages while agent is processing** — buffered in an mpsc channel.
+3. **Turn boundary** (agent finishes responding) — consumer drains all buffered messages up to `max_buffered_messages` or `max_batch_tokens`, packs them into one ACP turn.
+4. **Token cap overflow** — if the next message would exceed `max_batch_tokens`, it becomes the first message of the next batch (FIFO preserved).
+
+Each message in a batch retains its own `<sender_context>` delimiter so the agent can identify arrival boundaries and respond to each sender appropriately.
+
+## Defaults
+
+| Parameter | Default | Notes |
+|-----------|---------|-------|
+| `message_processing_mode` | `per-message` | Backward compatible, no batching |
+| `max_buffered_messages` | 10 | Only applies to `per-thread` / `per-lane` |
+| `max_batch_tokens` | 24000 | Rough estimate (~4 chars/token) |

From beb5493af722f72be9da660ced537b6c3e5e7e49 Mon Sep 17 00:00:00 2001
From: chaodu-agent <chaodu-agent@users.noreply.github.com>
Date: Tue, 5 May 2026 20:11:00 +0000
Subject: [PATCH 30/32] docs(dispatch): add ASCII diagrams for all three modes
 + consumer loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Visual explanation of per-message vs per-thread vs per-lane behavior,
plus the internal consumer_loop batching flow.

Co-Authored-By: 超渡法師 <chaodu-agent@users.noreply.github.com>
---
 docs/message-dispatch-modes.md | 119 +++++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 6 deletions(-)

diff --git a/docs/message-dispatch-modes.md b/docs/message-dispatch-modes.md
index d22ad088..9bb803f3 100644
--- a/docs/message-dispatch-modes.md
+++ b/docs/message-dispatch-modes.md
@@ -77,14 +77,121 @@ agents:
 
 The same fields are available under `slack:` and `gateway:` sections.
 
-## How Batching Works
+## How It Works — ASCII Diagrams
 
-1. **First message after idle** — dispatched immediately (zero added latency).
-2. **Subsequent messages while agent is processing** — buffered in an mpsc channel.
-3. **Turn boundary** (agent finishes responding) — consumer drains all buffered messages up to `max_buffered_messages` or `max_batch_tokens`, packs them into one ACP turn.
-4. **Token cap overflow** — if the next message would exceed `max_batch_tokens`, it becomes the first message of the next batch (FIFO preserved).
+### per-message (default)
 
-Each message in a batch retains its own `<sender_context>` delimiter so the agent can identify arrival boundaries and respond to each sender appropriately.
+```
+Time ──────────────────────────────────────────────────────►
+
+Alice: "hi"          Alice: "also this"       Bob: "hey"
+   │                      │                      │
+   ▼                      ▼                      ▼
+┌──────────┐         ┌──────────┐          ┌──────────┐
+│ ACP Turn │         │ ACP Turn │          │ ACP Turn │
+│ (1 msg)  │         │ (1 msg)  │          │ (1 msg)  │
+└────┬─────┘         └────┬─────┘          └────┬─────┘
+     ▼                    ▼                      ▼
+  Response 1           Response 2             Response 3
+
+Each message = its own turn. Simple. 3 messages → 3 responses.
+```
+
+### per-thread
+
+```
+Time ──────────────────────────────────────────────────────►
+
+Alice: "hi"     Bob: "hey"   Alice: "also this"
+   │               │              │
+   ▼               │              │
+┌──────────┐       │              │
+│ ACP Turn │  (agent busy...)     │
+│ (1 msg)  │       │              │
+└────┬─────┘       ▼              ▼
+     │         ┌────────────────────────┐
+     ▼         │ Buffer (shared thread) │
+  Response 1   │  → Bob: "hey"         │
+               │  → Alice: "also this" │
+               └───────────┬────────────┘
+                           ▼  (turn boundary)
+                    ┌──────────────┐
+                    │  ACP Turn    │
+                    │  (2 msgs     │
+                    │   batched)   │
+                    └──────┬───────┘
+                           ▼
+                      Response 2
+                   (addresses both)
+
+All senders share one buffer → one batched turn → one response.
+```
+
+### per-lane
+
+```
+Time ──────────────────────────────────────────────────────►
+
+Alice: "hi"     Bob: "hey"   Alice: "also this"
+   │               │              │
+   ▼               │              │
+┌──────────┐       │              │
+│ ACP Turn │  (agent busy...)     │
+│ (Alice)  │       │              │
+└────┬─────┘       ▼              ▼
+     │      ┌─────────────┐  ┌──────────────────┐
+     ▼      │ Bob's lane  │  │ Alice's lane     │
+Response 1  │ → "hey"     │  │ → "also this"    │
+            └──────┬──────┘  └────────┬─────────┘
+                   ▼                  │
+            ┌──────────────┐          │  (waits for Bob's turn)
+            │  ACP Turn    │          │
+            │  (Bob, 1msg) │          │
+            └──────┬───────┘          │
+                   ▼                  ▼
+              Response 2       ┌──────────────┐
+              (for Bob)        │  ACP Turn    │
+                               │  (Alice,1msg)│
+                               └──────┬───────┘
+                                      ▼
+                                 Response 3
+                                 (for Alice)
+
+Each sender gets their own lane → own turn → own response. No silent drop.
+```
+
+### Batching internals (consumer loop)
+
+```
+                    ┌─────────────────────────────────────┐
+                    │         Dispatcher (per thread)      │
+                    │                                     │
+  submit(msg) ─────►  mpsc channel (cap = max_buffered)  │
+                    │         │                           │
+                    │         ▼                           │
+                    │  ┌─────────────────────────┐       │
+                    │  │    consumer_loop         │       │
+                    │  │                         │       │
+                    │  │  1. Block on first msg  │       │  ← I1: zero latency
+                    │  │     (or idle timeout)   │       │
+                    │  │                         │       │
+                    │  │  2. Greedy drain:       │       │
+                    │  │     while try_recv()    │       │
+                    │  │       && count < cap    │       │
+                    │  │       && tokens < max   │       │
+                    │  │                         │       │
+                    │  │  3. Pack batch:         │       │
+                    │  │     [sender_ctx + msg]  │       │
+                    │  │     [sender_ctx + msg]  │       │
+                    │  │     ...                 │       │
+                    │  │                         │       │
+                    │  │  4. stream_prompt_blocks│       │  ← I2: one turn at a time
+                    │  │     (shared session)    │       │
+                    │  │                         │       │
+                    │  │  5. Loop back to 1      │       │
+                    │  └─────────────────────────┘       │
+                    └─────────────────────────────────────┘
+```
 
 ## Defaults
 

From b07804617cf24cfe3591f14ebf647b9052b29cb7 Mon Sep 17 00:00:00 2001
From: chaodu-agent <chaodu-agent@users.noreply.github.com>
Date: Tue, 5 May 2026 20:12:22 +0000
Subject: [PATCH 31/32] docs: clarify per-message is the default behavior

---
 docs/message-dispatch-modes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/message-dispatch-modes.md b/docs/message-dispatch-modes.md
index 9bb803f3..d658c067 100644
--- a/docs/message-dispatch-modes.md
+++ b/docs/message-dispatch-modes.md
@@ -6,7 +6,7 @@ OpenAB supports three message dispatch modes that control how incoming messages
 
 ### `per-message` (default)
 
-Each message triggers its own ACP turn. This is the v0.8.2 behavior — simple, predictable, no batching.
+Each message triggers its own ACP turn. This is the default behavior — simple, predictable, no batching. Existing deployments use this mode automatically without any configuration change.
 
 **Use when:**
 - Single-user threads (most common case)

From c324989d35d885fb689c6212f01aa9f8674ac3e9 Mon Sep 17 00:00:00 2001
From: chaodu-agent <chaodu-agent@users.noreply.github.com>
Date: Tue, 5 May 2026 20:13:44 +0000
Subject: [PATCH 32/32] docs(dispatch): add explicit pros/cons and comparison
 table for each mode

---
 docs/message-dispatch-modes.md | 53 ++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/docs/message-dispatch-modes.md b/docs/message-dispatch-modes.md
index d658c067..2e970230 100644
--- a/docs/message-dispatch-modes.md
+++ b/docs/message-dispatch-modes.md
@@ -8,34 +8,55 @@ OpenAB supports three message dispatch modes that control how incoming messages
 
 Each message triggers its own ACP turn. This is the default behavior — simple, predictable, no batching. Existing deployments use this mode automatically without any configuration change.
 
-**Use when:**
-- Single-user threads (most common case)
-- Low message volume
-- You want the simplest mental model (1 message = 1 agent response)
+**Pros:**
+- Zero added latency — every message dispatches immediately
+- Simplest mental model (1 message = 1 agent response)
+- No configuration needed
+- No risk of messages being grouped with unrelated context
 
-**Trade-off:** If a user sends 3 messages quickly while the agent is processing, each becomes a separate turn — the agent sees them independently and responds 3 times.
+**Cons:**
+- Redundant context: if a user sends 3 messages quickly, the agent loads full context 3 times
+- Higher token cost under burst traffic (N messages = N full context windows)
+- Agent responds to each message independently — can't synthesize a coherent answer across rapid-fire messages
 
 ### `per-thread`
 
 All messages in a thread share one buffer. Messages that arrive while the agent is processing are batched into a single ACP turn at the next turn boundary.
 
-**Use when:**
-- High-traffic threads where multiple users address the bot simultaneously
-- You want to minimize token cost (one context window serves N messages)
-- The agent is expected to address all senders in a single reply
+**Pros:**
+- Lowest token cost — one context window serves N messages
+- Agent sees the full picture and can synthesize a single coherent response
+- Reduces "sorry I already answered that" noise from sequential turns
 
-**Trade-off:** If Alice and Bob both message in the same thread, they share one batch. The agent gets one turn to address both — if it only replies to Alice, Bob's message is effectively "silent-dropped" (delivered but not addressed).
+**Cons:**
+- Silent-drop risk: if Alice and Bob both message, the agent may only address Alice
+- Single response must cover all senders — harder for the agent to structure
+- Slight latency for messages 2..N (they wait for the current turn to finish)
 
 ### `per-lane`
 
 Each (thread, sender) pair gets its own buffer. Messages from the same sender batch together, but different senders get independent ACP turns.
 
-**Use when:**
-- Multi-user threads where each sender expects a dedicated response
-- Multi-agent collaboration (bot-to-bot threads)
-- You want batching benefits without silent-drop risk
-
-**Trade-off:** More ACP turns than `per-thread` (one per active sender per turn boundary), so higher token cost. Turns serialize through the shared session — sender B waits for sender A's turn to complete.
+**Pros:**
+- No silent-drop risk — every sender gets their own dedicated turn and response
+- Same-sender batching still reduces redundant context (rapid-fire messages consolidated)
+- Best for multi-agent collaboration (each bot gets its own turn)
+
+**Cons:**
+- Higher token cost than `per-thread` (one turn per active sender per boundary)
+- Turns serialize through the shared session — sender B waits for sender A's turn to complete
+- More complex to reason about (ordering depends on arrival time)
+
+## Comparison Table
+
+| | `per-message` | `per-thread` | `per-lane` |
+|---|---|---|---|
+| Token cost | Highest (N turns) | Lowest (1 turn) | Medium (1 per sender) |
+| Latency | Zero | Wait for turn boundary | Wait for turn boundary |
+| Silent-drop risk | None | Yes (multi-sender) | None |
+| Rapid-fire handling | Each separate | All batched | Same-sender batched |
+| Best for | Single user, low traffic | Cost-sensitive, single-reply | Multi-user, multi-bot |
+| Configuration | None (default) | Opt-in | Opt-in |
 
 ## Decision Guide