Dark-Sys-Jenkins · anishh15 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/CHANGES.md b/CHANGES.md
@@ -0,0 +1,104 @@
+# LiveKit Intelligent Interruption Handler
+
+This implementation adds intelligent backchannel filtering to LiveKit voice agents. The agent can now distinguish between passive acknowledgements ("yeah", "mhmm", "okay") and actual interruption commands ("stop", "wait", "no").
+
+## Problem Solved
+
+When users provide backchannel feedback while an agent is speaking, the default VAD would interrupt the agent. This created a choppy conversation experience. Now:
+
+- **Agent is speaking + user says "mhmm"** → Agent continues seamlessly
+- **Agent is speaking + user says "stop"** → Agent stops immediately  
+- **Agent is silent + user says "yeah"** → Agent responds normally
+
+## Quick Start
+
+```bash
+# Install dependencies
+cd livekit-agents
+pip install -e .
+
+# Set up environment variables
+export LIVEKIT_URL="wss://your-livekit-server"
+export LIVEKIT_API_KEY="your-api-key"
+export LIVEKIT_API_SECRET="your-api-secret"
+export OPENAI_API_KEY="your-openai-key"
+export DEEPGRAM_API_KEY="your-deepgram-key"
+
+# Run the demo agent
+cd examples/voice_agents
+python interrupt_demo.py dev
+```
+
+Then connect via [LiveKit Playground](https://agents-playground.livekit.io/).
+
+## How It Works
+
+### Architecture
+
+```
+User Speech → VAD → STT → InterruptionFilter → Agent Response
+                              ↓
+                    Checks agent state:
+                    - Speaking? Filter backchannels
+                    - Silent? Allow all input
+```
+
+### Key Components
+
+1. **InterruptionFilter** (`livekit/agents/voice/interruption_filter.py`)
+   - Core filtering logic with configurable word lists
+   - `should_interrupt(transcript, agent_state)` returns True/False
+
+2. **Word Lists** (configurable)
+   - `DEFAULT_BACKCHANNEL_WORDS`: yeah, ok, mhmm, uh-huh, right, sure, etc.
+   - `DEFAULT_INTERRUPT_WORDS`: stop, wait, no, actually, hold on, etc.
+
+3. **Integration Points**
+   - `agent_activity.py`: Captures agent state, applies filter
+   - `audio_recognition.py`: Skips EOU detection for filtered utterances
+
+### Configuration
+
+```python
+from livekit.agents.voice import InterruptionFilterConfig
+
+# Custom configuration
+config = InterruptionFilterConfig(
+    backchannel_words={"yeah", "ok", "mhmm"},
+    interrupt_words={"stop", "wait"},
+    enabled=True
+)
+```
+
+## Files Changed
+
+| File | Description |
+|------|-------------|
+| `interruption_filter.py` | NEW - Core filter logic |
+| `agent_activity.py` | State tracking and filter integration |
+| `audio_recognition.py` | Skip EOU for filtered utterances |
+| `agent_session.py` | Configuration options |
+| `__init__.py` | Public API exports |
+
+## Testing
+
+```bash
+# Run unit tests from the project root
+python -m pytest livekit-agents/tests/test_interruption_filter.py -v
+```
+
+## Proof of Functionality
+
+See the `proof/` folder for:
+- `transcript.txt` - Annotated conversation transcript
+- Screen recording demonstrating the feature
+
+## Key Design Decisions
+
+1. **State Capture Timing**: Agent state is captured when user starts speaking, not when filter runs. This handles race conditions between VAD and STT.
+
+2. **EOU Detection Skip**: When backchannel is filtered, End-of-Utterance detection is also skipped to prevent new response generation.
+
+3. **Flexible Matching**: Hyphenated words like "uh-huh" match "uh-huh", "uh huh", and "uhhuh" to handle STT variations.
+
+4. **Safe Default**: Unknown words while agent is speaking trigger interruption (could be important).
diff --git a/examples/voice_agents/interrupt_demo.py b/examples/voice_agents/interrupt_demo.py
@@ -0,0 +1,169 @@
+# Intelligent Interruption Handling Demo Agent
+#
+# This example demonstrates the intelligent interruption handling feature that
+# distinguishes between passive acknowledgements (backchanneling) and actual
+# interruptions during voice conversations.
+#
+# When the agent is speaking:
+#   - "yeah", "ok", "hmm" → Agent continues speaking (backchannel)
+#   - "stop", "wait", "no" → Agent stops immediately (interruption)
+#   - "yeah but wait" → Agent stops (mixed input with interrupt word)
+#
+# When the agent is silent:
+#   - All user input is processed normally, including backchannel words
+#
+# Prerequisites:
+#   1. Set environment variables:
+#      - LIVEKIT_URL (e.g., wss://your-project.livekit.cloud)
+#      - LIVEKIT_API_KEY
+#      - LIVEKIT_API_SECRET
+#      - DEEPGRAM_API_KEY (or other STT provider)
+#      - OPENAI_API_KEY (or other LLM provider)
+#
+#   2. Get a free LiveKit Cloud account at: https://cloud.livekit.io
+#   3. Get a free Deepgram account at: https://console.deepgram.com
+#
+# Running the demo:
+#   python interrupt_demo.py dev
+#
+# Then connect via LiveKit Agents Playground:
+#   https://agents-playground.livekit.io/
+
+import logging
+
+from dotenv import load_dotenv
+
+from livekit.agents import (
+    Agent,
+    AgentServer,
+    AgentSession,
+    JobContext,
+    JobProcess,
+    RunContext,
+    cli,
+    room_io,
+)
+from livekit.agents.llm import function_tool
+from livekit.plugins import silero
+from livekit.plugins.turn_detector.multilingual import MultilingualModel
+
+logger = logging.getLogger("interrupt-demo")
+
+load_dotenv()
+
+
+class DemoAgent(Agent):
+    """Demo agent for testing intelligent interruption handling.
+
+    This agent tells long stories when asked, making it easy to test
+    whether backchanneling words trigger interruptions.
+    """
+
+    def __init__(self) -> None:
+        super().__init__(
+            instructions="""You are a friendly storyteller named Alex.
+
+Your job is to help demonstrate the intelligent interruption handling feature.
+When asked for a story, tell a LONG, engaging story (at least 3-4 paragraphs).
+
+When the user says things like "yeah", "ok", "uh-huh", or "hmm" while you're 
+speaking, these are just acknowledgements - keep talking!
+
+But if they say "stop", "wait", "hold on", or "actually" - stop and listen.
+
+Keep your responses conversational but long enough to test interruptions.
+Do not use emojis or special characters. Speak naturally.""",
+        )
+
+    async def on_enter(self):
+        """Greet the user when the session starts."""
+        self.session.generate_reply(
+            instructions="Greet the user and tell them you're here to tell stories. "
+            "Ask if they'd like to hear a story. Keep it brief - just 1-2 sentences."
+        )
+
+    @function_tool
+    async def tell_story(self, context: RunContext, topic: str = "adventure"):
+        """Tell a story about a given topic.
+
+        Args:
+            topic: The topic or theme for the story
+        """
+        logger.info(f"Telling a story about: {topic}")
+        return f"Tell a long, engaging story about {topic}. Make it at least 3-4 paragraphs."
+
+
+server = AgentServer()
+
+
+def prewarm(proc: JobProcess):
+    """Prewarm the VAD model for faster startup."""
+    proc.userdata["vad"] = silero.VAD.load()
+
+
+server.setup_fnc = prewarm
+
+
+@server.rtc_session()
+async def entrypoint(ctx: JobContext):
+    """Entry point for the voice agent session."""
+    ctx.log_context_fields = {
+        "room": ctx.room.name,
+    }
+
+    # Create session with intelligent interruption handling enabled
+    session = AgentSession(
+        # Speech-to-text - Deepgram Nova 3 provides fast, accurate transcription
+        stt="deepgram/nova-3",
+
+        # LLM - GPT-4.1-mini is fast and capable for storytelling
+        llm="openai/gpt-4.1-mini",
+
+        # Text-to-speech - Cartesia Sonic 2 for natural speech
+        tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
+
+        # Turn detection
+        turn_detection=MultilingualModel(),
+        vad=ctx.proc.userdata["vad"],
+
+        # Enable preemptive generation for lower latency
+        preemptive_generation=True,
+
+        # Enable false interruption resumption
+        resume_false_interruption=True,
+
+        # =================================================================
+        # INTELLIGENT INTERRUPTION HANDLING - The feature being demonstrated
+        # =================================================================
+
+        # Enable the interruption filter (default: True)
+        interruption_filter_enabled=True,
+
+        # Optional: Custom backchannel words to ignore when agent is speaking
+        # Uncomment to customize:
+        # backchannel_words={
+        #     "yeah", "yes", "yep", "ok", "okay", 
+        #     "hmm", "mhm", "uh-huh", "right", "sure",
+        # },
+
+        # Optional: Custom words that always trigger interruption
+        # Uncomment to customize:
+        # interrupt_words={
+        #     "stop", "wait", "hold on", "pause", "no",
+        #     "actually", "but", "however",
+        # },
+    )
+
+    logger.info("Starting session with intelligent interruption handling enabled")
+
+    await session.start(
+        agent=DemoAgent(),
+        room=ctx.room,
+        room_options=room_io.RoomOptions(
+            audio_input=room_io.AudioInputOptions(),
+        ),
+    )
+
+
+if __name__ == "__main__":
+    cli.run_app(server)
diff --git a/livekit-agents/livekit/agents/telemetry/traces.py b/livekit-agents/livekit/agents/telemetry/traces.py
@@ -15,13 +15,29 @@
 from opentelemetry.exporter.otlp.proto.http import Compression
 from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.sdk._logs import (
-    LogData,
-    LoggerProvider,
-    LoggingHandler,
-    LogRecord,
-    LogRecordProcessor,
-)
+try:
+    from opentelemetry.sdk._logs import (
+        LogData,
+        LoggerProvider,
+        LoggingHandler,
+        LogRecord,
+        LogRecordProcessor,
+    )
+except ImportError:
+    # Compatibility with newer opentelemetry-sdk versions where LogData was removed
+    from opentelemetry.sdk._logs import (
+        LoggerProvider,
+        LoggingHandler,
+        LogRecordProcessor,
+        ReadableLogRecord as LogRecord,
+    )
+    # Create a LogData-like class for compatibility
+    from dataclasses import dataclass
+    @dataclass
+    class LogData:
+        log_record: Any
+        instrumentation_scope: Any
+
 from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
 from opentelemetry.sdk.resources import SERVICE_NAME, Resource
 from opentelemetry.sdk.trace import SpanProcessor, TracerProvider
@@ -380,4 +396,4 @@ def _log(
     async with http_session.post(url, data=mp, headers=headers) as resp:
         resp.raise_for_status()
 
-    logger.debug("finished uploading")
+    logger.debug("finished uploading")
diff --git a/livekit-agents/livekit/agents/voice/__init__.py b/livekit-agents/livekit/agents/voice/__init__.py
@@ -1,6 +1,12 @@
 from . import io, run_result
 from .agent import Agent, AgentTask, ModelSettings
 from .agent_session import AgentSession, VoiceActivityVideoSampler
+from .interruption_filter import (
+    InterruptionFilter,
+    InterruptionFilterConfig,
+    DEFAULT_BACKCHANNEL_WORDS,
+    DEFAULT_INTERRUPT_WORDS,
+)
 from .events import (
     AgentEvent,
     AgentFalseInterruptionEvent,
@@ -45,6 +51,10 @@
     "FunctionToolsExecutedEvent",
     "AgentFalseInterruptionEvent",
     "TranscriptSynchronizer",
+    "InterruptionFilter",
+    "InterruptionFilterConfig",
+    "DEFAULT_BACKCHANNEL_WORDS",
+    "DEFAULT_INTERRUPT_WORDS",
     "io",
     "room_io",
     "run_result",
@@ -60,4 +70,4 @@
 __pdoc__ = {}
 
 for n in NOT_IN_ALL:
-    __pdoc__[n] = False
+    __pdoc__[n] = False