From a5dd39857fb6564a4521e0e2926668e757451eac Mon Sep 17 00:00:00 2001 From: Prachi Saxena Date: Tue, 3 Feb 2026 00:25:32 +0530 Subject: [PATCH] Implemented intelligent interruption logic and fixed python dependencies --- SUBMISSION.md | 25 +++++++++++++ examples/smart_agent.py | 80 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 SUBMISSION.md create mode 100644 examples/smart_agent.py diff --git a/SUBMISSION.md b/SUBMISSION.md new file mode 100644 index 0000000000..2d497d9c63 --- /dev/null +++ b/SUBMISSION.md @@ -0,0 +1,25 @@ +# Intelligent Interruption Handling Agent + +## Overview +This agent implements smart interruption logic using LiveKit Agents. It solves the "VAD Sensitivity" problem by filtering user speech before stopping the agent. + +## Logic Implementation +The agent listens to the `user_speech_committed` event and applies the following decision matrix: +1. **Passive Backchanneling:** If the user says words like *"yeah", "uh-huh", "ok"* (defined in `IGNORE_WORDS`), the agent **ignores** them and continues speaking. +2. **Active Interruption:** If the user says command words like *"stop", "wait"* (defined in `INTERRUPT_WORDS`), the agent **interrupts** immediately. +3. **New Input:** Any other sentence triggers a standard interruption. + +## How to Run +1. **Install Dependencies:** + ```bash + pip install livekit-agents livekit-plugins-openai livekit-plugins-silero python-dotenv + ``` +2. **Set Environment Variables:** + Create a `.env` file with `LIVEKIT_URL`, `LIVEKIT_API_KEY`, `LIVEKIT_API_SECRET`, and `OPENAI_API_KEY`. +3. **Run the Agent:** + ```bash + python examples/smart_agent.py dev + ``` + +## Python Version +This project uses **Python 3.12** (via a virtual environment) to ensure compatibility with `livekit-agents` v0.8+. \ No newline at end of file diff --git a/examples/smart_agent.py b/examples/smart_agent.py new file mode 100644 index 0000000000..cc1576efb5 --- /dev/null +++ b/examples/smart_agent.py @@ -0,0 +1,80 @@ +import logging +import asyncio +from dotenv import load_dotenv + +from livekit.agents import ( + AutoSubscribe, + JobContext, + WorkerOptions, + cli, +) +# This import is GUARANTEED to work on v0.8.3 +from livekit.agents.pipeline import VoicePipelineAgent +from livekit.plugins import openai, silero + +load_dotenv() +logger = logging.getLogger("smart-agent") + +IGNORE_WORDS = {"yeah", "ok", "hmm", "uh-huh", "right", "yep", "okay"} +INTERRUPT_WORDS = {"stop", "wait", "no", "cancel", "hold on"} + +def prewarm_fnc(proc): + proc.userdata.update({"vad": silero.VAD.load()}) + +async def entrypoint(ctx: JobContext): + # Connect + print(f"-> Connecting to room: {ctx.room.name}") + await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY) + print("-> Connected! Waiting for user audio...") + + # Initialize the agent + agent = VoicePipelineAgent( + vad=ctx.proc.userdata["vad"], + stt=openai.STT(), + llm=openai.LLM(model="gpt-4o-mini"), + tts=openai.TTS(), + ) + + # --- INTERRUPTION LOGIC --- + @agent.on("user_speech_committed") + def on_user_speech(msg): + # 1. Get Text + if hasattr(msg, "content"): + user_text = msg.content + else: + user_text = str(msg) + + user_text = user_text.lower().strip() + print(f"✅ User said: '{user_text}'") + + # 2. Check State (Is agent speaking?) + is_speaking = False + if agent.speech_handle and not agent.speech_handle.interrupted: + is_speaking = True + + if is_speaking: + # Logic A: Ignore filler + if user_text in IGNORE_WORDS: + print(f" -> 🛡️ IGNORING backchannel (Agent continues)") + return + + # Logic B: Stop on command + if any(word in user_text for word in INTERRUPT_WORDS): + print(f" -> 🛑 INTERRUPTING (Command detected)") + asyncio.create_task(agent.interrupt()) + return + + # Logic C: New Input + print(f" -> 🛑 INTERRUPTING (New input)") + asyncio.create_task(agent.interrupt()) + + agent.start(ctx.room) + await agent.say("I am online and ready. I am running on the golden version.") + +if __name__ == "__main__": + cli.run_app( + WorkerOptions( + entrypoint_fnc=entrypoint, + prewarm_fnc=prewarm_fnc, + ), + ) \ No newline at end of file