diff --git a/backend/agents/avatar_director.py b/backend/agents/avatar_director.py
index f9a9fc8..447be6b 100644
--- a/backend/agents/avatar_director.py
+++ b/backend/agents/avatar_director.py
@@ -67,6 +67,20 @@
 IDLE_ROTATE_MIN_MS = 8_000   # tighter cadence so demo viewer sees variety quickly
 IDLE_ROTATE_MAX_MS = 18_000
 
+# Set of `emitted_by` values that the Director treats as autonomous Tier 1
+# emits — they DO NOT claim the Tier 1 layer (no busy_until update) and are
+# themselves SUPPRESSED while a deliberate Tier 1 owns the layer. Anything
+# not in this set (play_*, dispatch_*, reading_chat, etc.) is considered
+# deliberate and claims Tier 1 for its ttl. Single source of truth so we
+# never miss-categorise an emitter at a call site.
+_IDLE_TIER1_EMITTERS: frozenset[str] = frozenset({
+    "idle_init",            # bootstrap (Tier 0; included for symmetry)
+    "idle_rotate",          # Tier 0 idle rotation
+    "idle_interjection",    # Tier 1 random interjection (sip / glance / walk)
+    "motivated_idle.thinking",  # Tier 0 swap to thinking pose
+    "motivated_idle.sip",   # post-response motivated sip
+})
+
 # Tier 0 = looping idle clips. Each one is symmetric (boomerangable) and
 # meant to play indefinitely under the reactive layer. Director rotates
 # between them every 12-30s.
@@ -116,9 +130,15 @@
 # glance every 12s reads as "she keeps getting distracted by the same
 # thing." One-shot per rotation is the right cadence.
 TIER1_INTERJECTIONS: list[tuple[str, str, float, str]] = [
-    ("misc_sip_drink",       "/states/idle/misc_sip_drink.mp4",            0.45, ""),
-    ("misc_walk_off_return", "/states/idle/misc_walk_off_return.mp4",      0.25, ""),
-    ("misc_glance_aside",    "/states/idle/misc_glance_aside_speaking.mp4",0.30, ""),
+    ("misc_sip_drink",       "/states/idle/misc_sip_drink.mp4",          0.45, ""),
+    ("misc_walk_off_return", "/states/idle/misc_walk_off_return.mp4",    0.25, ""),
+    # Use the EXPLICIT _silent.mp4 variant. The previous URL pointed at
+    # the *_speaking.mp4 file (rendered for Wav2Lip overlay) which has
+    # visible mouth movement; played muted as an idle interjection it
+    # reads as the avatar silently mouthing words — uncanny. The silent
+    # render at veo_silent_idle_renders.py was made specifically for
+    # this idle-rotation context; mouth stays closed.
+    ("misc_glance_aside",    "/states/idle/misc_glance_aside_silent.mp4",0.30, ""),
 ]
 
 # Probability per idle-rotation tick that we play a Tier 1 interjection
@@ -163,6 +183,22 @@ def __init__(self, broadcast: Callable[[dict], Awaitable[None]]):
         self._voice_state: str | None = None
         self._thinking_task: asyncio.Task | None = None
         self._last_sip_at: float = 0.0
+        # Processing-chain bookkeeping for play_processing's two-clip
+        # narrative (walk_off_return → processing.mp4). Each call increments
+        # the id; the queued processing.mp4 emit checks the id at fire time
+        # to detect supersession (back-to-back upload, pitch arriving early,
+        # etc.) and skip cleanly. dispatch_audio_first_pitch bumps this so
+        # an early pitch cancels the queued bridge tail.
+        self._processing_chain_id: int = 0
+        # Tier 1 busy horizon (monotonic seconds). Set automatically by
+        # emit() whenever a deliberate Tier 1 fires (bridge / pitch /
+        # response / reading_chat / listening_attentive / processing /
+        # fetching), using ttl_ms (or a 60s default for looped clips).
+        # _idle_loop's interjection branch checks this every tick and
+        # skips the random misc_* emit while a deliberate clip owns the
+        # layer — kills the "she glances aside silently DURING the
+        # processing.mp4 readback" overlap class. Cleared by fade_to_idle.
+        self._tier1_busy_until: float = 0.0
 
     def current_substrate_pod_path(self) -> str:
         """The Wav2Lip server-side path that matches the visible Tier 0 clip,
@@ -238,6 +274,29 @@ async def emit(
         }
         self._last_intent[layer] = intent
         self._last_url[layer] = url
+        # Tier 1 busy-tracking. Any deliberate Tier 1 emit (anything not
+        # produced by the autonomous idle rotation or the explicit
+        # fade-to-idle release) extends the busy horizon to ttl_ms in the
+        # future (or 60 s for looped pitches that have no natural end and
+        # rely on dispatch_audio_first_pitch's _release task to call
+        # fade_to_idle when audio ends). _idle_loop checks this every
+        # tick and skips the random Tier 1 interjection branch while a
+        # deliberate clip owns the layer — prevents the silent-glance /
+        # sip / walk-off rotation from overlaying pitch / processing /
+        # response renders.
+        if layer == "tier1":
+            if emitted_by in _IDLE_TIER1_EMITTERS:
+                pass  # autonomous emit — don't claim the layer
+            elif emitted_by == "fade_to_idle":
+                # The release sentinel — explicitly clear the horizon so
+                # idle rotation can resume at the very next tick.
+                self._tier1_busy_until = 0.0
+            else:
+                if loop:
+                    busy_for_s = 60.0  # cleared by fade_to_idle
+                else:
+                    busy_for_s = (ttl_ms or expected_duration_ms or 8_000) / 1000
+                self._tier1_busy_until = time.monotonic() + busy_for_s
         logger.info("[director] emit %s/%s -> %s (mode=%s fade=%dms muted=%s dur=%s)",
                     layer, intent, url, mode, fade_ms, muted, expected_duration_ms)
         try:
@@ -287,38 +346,85 @@ async def reading_chat(self) -> None:
     async def play_processing(self) -> None:
         """Tier 1 ambient cover for the upload→pitch processing window.
 
-        Plays the ~14 s "she picks up a printed spec sheet, reads it, then
-        sets it down + settles into anchor pose" Veo clip. Maps believably
-        to "the AI is reviewing your product" for the audience while Gemma
-        + rembg + TTS + Wav2Lip churn in the background — bridges the
-        otherwise-dead 5-15 s gap between upload landing and pitch starting.
-
-        Crossfade behaviour:
-        - If the pipeline finishes BEFORE the clip ends (~5-13 s typical
-          for cache-warm runs), `dispatch_audio_first_pitch` emits a new
-          Tier 1 (`pitch_veo`) which crossfades over this clip mid-readback
-          — feels like "she finished thinking and started speaking."
-        - If the pipeline finishes AFTER the clip ends (cold Wav2Lip,
-          large product video, etc.), the clip ends naturally and Tier 0
-          idle resumes underneath until the pitch crossfades in. Slight
-          "she put the paper down then thought for a sec" beat — still
-          reads as natural human pacing rather than a frozen avatar.
-
-        End frame is the canonical anchor pose (hands at waist, soft
-        smile, eye contact) so the pitch crossfade lands clean either
-        way — same target pose as the welcome clip + the Wav2Lip
+        Two-clip narrative chain:
+          1. walk_off_return (8.0 s) — emitted as intent="fetching" so the
+             HUD reads "she went to grab the item the operator just sent."
+             The audience reads the off-screen beat as "AI is fetching the
+             product the operator just dropped." She walks back into frame
+             at the tail.
+          2. processing.mp4 (14.13 s) — chained ~7.7 s after step 1 (300 ms
+             tail overlap so the dashboard crossfade hides the cut). She's
+             back in frame and now picks up a printed spec sheet, reads it,
+             sets it down. Maps to "AI is now reviewing what was just
+             handed to it."
+
+        Total bridge ≈ 22 s. Pipeline target is 10-15 s, so the pitch
+        usually crossfades over processing.mp4 mid-readback — feels like
+        "she finished reading and started speaking." If the pipeline
+        outruns the bridge entirely (cold Wav2Lip + large video), the
+        clip ends naturally and Tier 0 idle resumes until the pitch
+        crossfades in.
+
+        Race handling — the queued processing.mp4 emit is gated on
+        `_processing_chain_id`. Bumped by:
+          - back-to-back call to play_processing (second upload before
+            the first finishes) — the older queued tail no-ops out.
+          - dispatch_audio_first_pitch — an early pitch cancels the
+            queued processing tail so the pitch isn't overlaid mid-read.
+
+        End frame of processing.mp4 is the canonical anchor pose (hands
+        at waist, soft smile, eye contact) so the pitch crossfade lands
+        clean — same target pose as the welcome clip + the Wav2Lip
         substrates. No special handoff logic needed.
         """
+        # No debounce needed — the route handler is the sole call site
+        # post-Option-A, so this fires exactly once per upload. Back-to-
+        # back uploads bump the chain_id and the queued processing.mp4
+        # tail of the older chain no-ops out at the supersession check.
+        self._processing_chain_id += 1
+        chain_id = self._processing_chain_id
+
+        # Step 1: walk-off-and-return. Intent label "fetching" surfaces in
+        # the dashboard HUD so it's obvious which narrative beat is on
+        # screen. ttl_ms = 8000 (probed) so the player knows when to
+        # expect the natural end if no follow-up arrives.
         await self.emit(
             "tier1",
-            "processing",
-            "/bridges/processing/processing.mp4",
+            "fetching",
+            "/states/idle/misc_walk_off_return.mp4",
             loop=False,
             mode="crossfade",
-            ttl_ms=14_130,
-            emitted_by="play_processing",
+            ttl_ms=8_000,
+            emitted_by="play_processing_fetch",
         )
 
+        # Step 2: schedule processing.mp4 as the second link. 7.7 s wait
+        # = walk_off duration (8.0) - 300 ms overlap. The 300 ms tail
+        # gives the Tier 1 crossfade enough room to hide the seam between
+        # her stepping back into frame and her picking up the paper.
+        async def _chain_processing(my_id: int) -> None:
+            try:
+                await asyncio.sleep(7.7)
+            except asyncio.CancelledError:
+                return
+            if self._processing_chain_id != my_id:
+                # Superseded by another play_processing call or by a pitch
+                # dispatch. Don't overlay stale content.
+                logger.info("[director] processing chain superseded (id %d → %d), skip",
+                            my_id, self._processing_chain_id)
+                return
+            await self.emit(
+                "tier1",
+                "processing",
+                "/bridges/processing/processing.mp4",
+                loop=False,
+                mode="crossfade",
+                ttl_ms=14_130,
+                emitted_by="play_processing",
+            )
+
+        asyncio.create_task(_chain_processing(chain_id))
+
     async def play_bridge(self, label: str) -> dict[str, Any] | None:
         """Pick a bridge from the runtime LatentSync manifest and emit it.
         Returns the chosen entry or None if no bridge available."""
@@ -394,6 +500,13 @@ async def dispatch_audio_first_pitch(
         """
         url = video_url or _DEFAULT_PITCH_VIDEO_URL
 
+        # Cancel any pending play_processing tail. Bumping the chain id
+        # makes the queued processing.mp4 emit (still asyncio.sleep'ing in
+        # _chain_processing) no-op when it wakes — prevents the bridge
+        # from overlaying the pitch mid-speech if the pipeline finishes
+        # before the walk_off → processing handoff has happened.
+        self._processing_chain_id += 1
+
         # Tier 1 muted looping pose. Dashboard mutes the video element
         # (audio is owned by the standalone <audio>) and skips the
         # duration handshake because loop=True (no natural end).
@@ -542,7 +655,16 @@ async def _idle_loop(self) -> None:
                 # Tier 1 interjections (sip, walk-off) don't change the
                 # underlying Tier 0 substrate; the Director keeps the active
                 # idle pose paired with the next response.
-                if random.random() < INTERJECTION_PROBABILITY and TIER1_INTERJECTIONS:
+                #
+                # SUPPRESS while a deliberate Tier 1 (bridge / pitch /
+                # response / reading_chat / fetching / processing) is
+                # active — checked via _tier1_busy_until set in emit().
+                # When the deliberate horizon expires, we naturally fall
+                # through to either another Tier 1 interjection on the
+                # next tick or rotate Tier 0 below.
+                if (random.random() < INTERJECTION_PROBABILITY
+                        and TIER1_INTERJECTIONS
+                        and time.monotonic() >= self._tier1_busy_until):
                     pick = self._weighted_pick(TIER1_INTERJECTIONS)
                     if pick:
                         intent, url, _w, _pod = pick
@@ -685,15 +807,15 @@ def _schedule_sip_after(self, delay_ms: int) -> None:
         async def _fire():
             try:
                 await asyncio.sleep(delay_ms / 1000)
-                # Only sip if Tier 1 is currently idle (Tier 0 painting).
-                # If a new response is in flight we don't want to interrupt.
-                if self._last_intent.get("tier1") not in (
-                    "", "idle_release", "idle_init", "reading_chat",
-                    "listening_attentive", None,
-                ):
-                    # Keep the timestamp but skip the sip; another response
-                    # is on stage.
-                    logger.debug("[director] motivated sip skipped — tier1 active")
+                # Skip the sip if a deliberate Tier 1 currently owns the
+                # layer (pitch / response / processing / fetching /
+                # reading_chat / listening). Uses the same busy_until
+                # horizon as _idle_loop's interjection branch — single
+                # mechanism, consistent behaviour. Replaces the older
+                # explicit-intent skip-list which missed the new chain
+                # intents (fetching / processing).
+                if time.monotonic() < self._tier1_busy_until:
+                    logger.debug("[director] motivated sip skipped — tier1 busy")
                     return
                 logger.info("[director] motivated idle: sip_drink after %dms", delay_ms)
                 await self.emit(
diff --git a/backend/main.py b/backend/main.py
index 5e586d5..2dccd7a 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -466,23 +466,12 @@ async def run_sell_pipeline(frame_b64: str, voice_text: str,
     logger.info("=" * 60)
     pipeline_start = time.time()
 
-    # Tier 1 ambient cover for the entire upload→pitch processing window.
-    # 14s of avatar-picks-up-spec-sheet-and-reads-it footage that maps
-    # believably to "AI is reviewing your product" while Gemma + Wav2Lip
-    # do their work. Without this clip the avatar sits in mute Tier 0
-    # idle for 5-15s between upload and pitch start — reads as a frozen
-    # broadcast. dispatch_audio_first_pitch crossfades the pitch over
-    # this clip when ready (auto-cancels the natural-end fade); if the
-    # pipeline runs longer than 14s, the clip ends naturally and Tier 0
-    # resumes underneath until pitch lands. Voice state flips to
-    # `thinking` so the Spin3D rim light + voice pill both reflect the
-    # "AI is processing" moment in lockstep with the visual.
-    if director:
-        try:
-            await director.play_processing()
-            await director.set_voice_state("thinking")
-        except Exception:
-            logger.exception("[pipeline] play_processing emit failed (non-fatal)")
+    # NOTE: upload-bridge (walk_off → processing.mp4 chain) + voice_state
+    # are now fired by the route handler via _play_upload_bridge(), not
+    # here. Calling once at the route layer means the chain fires exactly
+    # once per upload regardless of which pipeline runs (run_sell_pipeline
+    # is also called from run_video_sell_pipeline and we previously got
+    # double walk-offs). See Option A in the avatar-director refactor.
 
     # PHASE 1: Product analysis + pitch script + background removal in parallel.
     #
@@ -839,10 +828,28 @@ async def api_analyze(file: UploadFile = File(...), voice_text: str = Form("sell
     return result
 
 
+async def _play_upload_bridge() -> None:
+    """Fire the upload-bridge cover (walk_off → processing chain) and flip
+    voice-state to 'thinking' so Spin3D rim-light + voice pill move in
+    lockstep with the visual. Bridge ownership lives at the route layer
+    (Option A) — the pipelines no longer emit it, which means the chain
+    fires exactly once per upload regardless of which pipeline runs.
+    Wrapped non-fatal: a Director-broadcast failure should never block
+    the pipeline kickoff."""
+    if not director:
+        return
+    try:
+        await director.play_processing()
+        await director.set_voice_state("thinking")
+    except Exception:
+        logger.exception("[route] play_processing emit failed (non-fatal)")
+
+
 @app.post("/api/sell")
 async def api_sell(file: UploadFile = File(...), voice_text: str = Form("sell this")):
     contents = await file.read()
     frame_b64 = base64.b64encode(contents).decode()
+    await _play_upload_bridge()
     asyncio.ensure_future(run_sell_pipeline(frame_b64, voice_text))
     return {"status": "pipeline_started"}
 
@@ -888,6 +895,12 @@ async def api_sell_video(file: UploadFile = File(...), voice_text: str = Form("s
     # first checkmark light up without waiting for intake to start.
     asyncio.ensure_future(_emit_pipeline_step(request_id, "uploaded", "done",
                                                ms=0, detail=f"{len(contents)}B"))
+    # Bridge BEFORE pipeline kickoff so the avatar starts walking off
+    # within ~50 ms of the POST landing — closes the pre-intake dead-air
+    # gap (~3-5 s of Deepgram + frame extract) on top of the existing
+    # in-pipeline coverage. Awaited (not background-scheduled) so the
+    # broadcast actually flushes before the heavy work starts.
+    await _play_upload_bridge()
     asyncio.ensure_future(run_video_sell_pipeline(video_path, voice_text,
                                                    request_id=request_id))
     return {"status": "video_pipeline_started", "bytes": len(contents),
@@ -905,25 +918,10 @@ async def run_video_sell_pipeline(video_path: str, voice_text: str,
     logger.info("  voice_text: %s", voice_text[:100])
     logger.info("=" * 60)
 
-    # Fire the "she picks up a spec sheet and reads it" Tier 1 cover IMMEDIATELY
-    # — before intake (Deepgram + frame extraction + carousel build, ~3-5 s)
-    # even starts. Without this the avatar sits in dead idle for 3-5 s after
-    # the operator drops a video, then ANOTHER 5-15 s during Phase 1 Gemma
-    # before the pitch lands. Total dead-air budget was 8-20 s; firing here
-    # closes the 3-5 s pre-intake gap and keeps the audience visually
-    # engaged the entire way to the pitch crossfade.
-    #
-    # Also fires from run_sell_pipeline for the photo upload path
-    # (/api/sell). Calling here twice for video uploads is fine — the
-    # second emit is a no-op crossfade to the same URL (Director.emit
-    # logs it but the dashboard sees identical url + intent and doesn't
-    # re-prepare the video element).
-    if director:
-        try:
-            await director.play_processing()
-            await director.set_voice_state("thinking")
-        except Exception:
-            logger.exception("[video-pipeline] play_processing emit failed (non-fatal)")
+    # NOTE: upload-bridge (walk_off → processing.mp4) is fired by the
+    # /api/sell-video route handler via _play_upload_bridge(), not here.
+    # Single call site = no chain restart on the run_video → run_sell
+    # cascade. See Option A in the avatar-director refactor.
 
     log_event("SYSTEM", "Video received. Starting intake pipeline...")
     await _emit_pipeline_step(request_id, "deepgram", "active")
@@ -1832,8 +1830,28 @@ async def api_respond_to_comment(
     if director:
         bridge_task = asyncio.create_task(director.play_bridge(comment_type))
 
+    # Item 6 — match the pitch path: if active_language is non-English,
+    # translate the response_text first and pass language_code to
+    # ElevenLabs flash_v2_5 (multilingual). The translator caches every
+    # (text_hash, lang) tuple in sqlite so repeat answers are free; only
+    # the first time we ever speak a particular response in a new
+    # language costs one Claude Haiku call. Failure modes (Bedrock error,
+    # unknown lang) fall through to the original English text — see
+    # translator.translate() for the fallback contract.
+    active_lang = pipeline_state.get("active_language", "en")
+    tts_text = response_text
+    if active_lang != "en":
+        try:
+            tts_text = await translator.translate(response_text, active_lang)
+            if tts_text != response_text:
+                log_event("SELLER", f"Response translated to {active_lang} "
+                                    f"({len(response_text)} → {len(tts_text)} chars)")
+        except Exception as e:
+            logger.warning("[lang] response translate failed (%s) — falling back to English", e)
+            tts_text = response_text
+
     t0 = time.time()
-    audio_bytes = await text_to_speech(response_text)
+    audio_bytes = await text_to_speech(tts_text, language_code=active_lang)
     tts_ms = int((time.time() - t0) * 1000)
 
     # Best-effort: collect bridge result without blocking. If the manifest
@@ -2514,7 +2532,7 @@ async def dev_transitions() -> HTMLResponse:
 const T1 = [
   { intent: 'misc_sip_drink',       url: '/states/idle/misc_sip_drink.mp4',            weight: 0.40 },
   { intent: 'misc_walk_off_return', url: '/states/idle/misc_walk_off_return.mp4',      weight: 0.20 },
-  { intent: 'misc_glance_aside',    url: '/states/idle/misc_glance_aside_speaking.mp4',weight: 0.25 },
+  { intent: 'misc_glance_aside',    url: '/states/idle/misc_glance_aside_silent.mp4', weight: 0.25 },
   { intent: 'welcome',              url: '/bridges/welcome/welcome.mp4',               weight: 0.15 },
 ];
 
diff --git a/dashboard/src/App.jsx b/dashboard/src/App.jsx
index 49c4b3b..02787c7 100644
--- a/dashboard/src/App.jsx
+++ b/dashboard/src/App.jsx
@@ -2,6 +2,7 @@ import React, { useState } from 'react';
 import { useEmpireSocket } from './hooks/useEmpireSocket';
 import { TikTokShopOverlay } from './components/TikTokShopOverlay';
 import { StartDemoOverlay } from './components/StartDemoOverlay';
+import { LanguagePicker } from './components/LanguagePicker';
 
 /**
  * App — operator stage at /
@@ -35,6 +36,8 @@ export default function App() {
     liveStage, wsRef, connected,
     audioResponse, setAudioResponse, pitchAudio, setPitchAudio,
     view3d,
+    activeClips,
+    activeLanguage, setActiveLanguage,
   } = useEmpireSocket();
 
   const [dragging, setDragging] = useState(false);
@@ -114,6 +117,15 @@ export default function App() {
           Hint disappears the moment hasUploaded flips. */}
       {!hasUploaded && (
         <div style={styles.emptyHint}>
+          {/* Step 1: pick language. Big tappable tiles so the operator
+              knows ahead of the drop which language the avatar will
+              speak. Backend reads pipeline_state["active_language"] at
+              pitch-time, so picking AFTER drop is fine too — but we
+              surface it first so the demo flow is "pick → drop → speak". */}
+          <LanguagePicker
+            activeLanguage={activeLanguage}
+            onChange={setActiveLanguage}
+          />
           <span style={styles.emptyHintIcon}>📦</span>
           <p style={styles.emptyHintLabel}>Drop a product video to start</p>
           <p style={styles.emptyHintSub}>
@@ -168,6 +180,34 @@ export default function App() {
         </div>
       )}
 
+      {/* Post-upload language chip — stays reachable so the operator can
+          flip languages mid-stream (e.g. switch from English pitch to a
+          Spanish Q&A response if a viewer comments in Spanish). Mounted
+          outside the 9:16 phone silhouette so it doesn't clutter the
+          audience-facing surface. Hidden pre-upload because the full
+          picker is already centered in the empty-state hint. */}
+      {hasUploaded && (
+        <div style={styles.langChipSlot}>
+          <LanguagePicker
+            activeLanguage={activeLanguage}
+            onChange={setActiveLanguage}
+            compact
+          />
+        </div>
+      )}
+
+      {/* Debug HUD — top-left fixed pill showing the active Director clip
+          per layer. Always mounted (even pre-upload) so we can identify
+          idle-rotation clips in real time and catch any bad pool entries
+          (e.g., the misc_glance_aside_speaking.mp4 silent-mouthing bug
+          we just fixed). Tier 0 = always-on idle background, Tier 1 =
+          one-shot interjections / pitch / processing bridge. The intent
+          name lines up with the avatar_director.py library entries. */}
+      <div style={styles.clipHud}>
+        <ClipHudRow label="T0" clip={activeClips?.tier0} />
+        <ClipHudRow label="T1" clip={activeClips?.tier1} />
+      </div>
+
       {/* Tiny connection indicator — bottom-right corner. Only loud when
           DISCONNECTED so the operator knows when to refresh. CONNECTED
           state stays whisper-quiet (no green spam during a stable run). */}
@@ -190,6 +230,31 @@ export default function App() {
   );
 }
 
+// Single row of the debug clip HUD. Renders the layer label (T0/T1),
+// the intent name, and the mp4 basename so we can immediately identify
+// which idle/interjection/bridge clip the Director just emitted. Empty
+// (dimmed) when no clip has played on that layer yet this session.
+function ClipHudRow({ label, clip }) {
+  const isActive = !!clip;
+  const filename = clip?.url ? clip.url.split('/').pop() : null;
+  // Color-code the row when the muted flag and intent semantically
+  // disagree — speaking-named clips ("_speaking") that are emitted
+  // muted are the exact bug class we're hunting. Loud red when caught.
+  const isSilentSpeak = isActive && filename?.includes('_speaking') && clip.muted;
+  return (
+    <div style={{
+      ...styles.clipHudRow,
+      ...(isSilentSpeak ? styles.clipHudRowAlert : null),
+      opacity: isActive ? 1 : 0.35,
+    }}>
+      <span style={styles.clipHudTier}>{label}</span>
+      <span style={styles.clipHudIntent}>{clip?.intent || '—'}</span>
+      <span style={styles.clipHudFile}>{filename || 'idle'}</span>
+      {clip?.muted && <span style={styles.clipHudMuted}>MUTED</span>}
+    </div>
+  );
+}
+
 const styles = {
   root: {
     position: 'fixed', inset: 0,
@@ -198,6 +263,45 @@ const styles = {
     color: '#fafafa',
     fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
   },
+  // Debug HUD — top-left, low chrome, monospace so filenames stay
+  // readable. Always visible regardless of hasUploaded.
+  clipHud: {
+    position: 'fixed', top: 14, left: 14, zIndex: 80,
+    display: 'flex', flexDirection: 'column', gap: 4,
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+    pointerEvents: 'none',
+  },
+  clipHudRow: {
+    display: 'flex', alignItems: 'center', gap: 8,
+    background: 'rgba(15,15,18,0.8)',
+    backdropFilter: 'blur(10px)',
+    border: '1px solid #27272a',
+    borderRadius: 6, padding: '4px 8px',
+    fontSize: 10, lineHeight: 1.2,
+    minWidth: 280,
+  },
+  clipHudRowAlert: {
+    border: '1px solid #ef4444',
+    background: 'rgba(127,29,29,0.65)',
+    boxShadow: '0 0 12px rgba(239,68,68,0.5)',
+  },
+  clipHudTier: {
+    fontWeight: 800, color: '#a1a1aa', letterSpacing: 1,
+    minWidth: 18,
+  },
+  clipHudIntent: {
+    fontWeight: 700, color: '#fafafa',
+    minWidth: 130,
+  },
+  clipHudFile: {
+    color: '#71717a', flex: 1,
+    overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap',
+  },
+  clipHudMuted: {
+    fontWeight: 800, color: '#fbbf24', fontSize: 9,
+    background: 'rgba(146,64,14,0.4)',
+    padding: '1px 4px', borderRadius: 3, letterSpacing: 1,
+  },
   // Pre-upload state — centered drop affordance over a black canvas.
   // Intentionally minimal: one icon, one prompt line, one tiny tech-stack
   // sub-line. No buttons (drag-drop is the only entry point), no logo,
@@ -252,6 +356,13 @@ const styles = {
     boxShadow: '0 4px 20px rgba(22,163,74,0.4)',
     backdropFilter: 'blur(8px)',
   },
+  // Top-right corner slot for the compact LanguagePicker chip. zIndex
+  // sits above the avatar's TikTok overlay (which uses zIndex up to 60
+  // for the LIVE pill / chat rail) so the popout grid isn't clipped by
+  // any 9:16-internal layer when the operator clicks to expand mid-stream.
+  langChipSlot: {
+    position: 'fixed', top: 14, right: 14, zIndex: 110,
+  },
   uploadPingDot: {
     width: 8, height: 8, borderRadius: 4, background: '#fff',
     animation: 'pulse 1s ease-in-out infinite',
diff --git a/dashboard/src/components/LanguagePicker.jsx b/dashboard/src/components/LanguagePicker.jsx
new file mode 100644
index 0000000..5090dee
--- /dev/null
+++ b/dashboard/src/components/LanguagePicker.jsx
@@ -0,0 +1,314 @@
+import React, { useEffect, useRef, useState } from 'react';
+
+/**
+ * LanguagePicker — operator-facing live-language selector.
+ *
+ * Surfaces the backend's `pipeline_state["active_language"]` as a tappable
+ * pill row (one tile per supported language). On change we POST
+ * `/api/live/language` with `lang=<code>`; the server flips its in-memory
+ * state and broadcasts a `language_changed` WS event so every connected
+ * dashboard stays in sync (useEmpireSocket subscribes + updates the
+ * `activeLanguage` it hands back to App).
+ *
+ * Flow once the operator picks a language:
+ *   1. POST /api/live/language → server updates pipeline_state, broadcasts.
+ *   2. Operator drops the product video.
+ *   3. run_video_sell_pipeline → run_sell_pipeline reads active_language
+ *      and feeds it into translator.translate(script, lang) +
+ *      text_to_speech(..., language_code=lang). ElevenLabs flash_v2_5 is
+ *      multilingual so the avatar speaks in the chosen language with the
+ *      same voice id.
+ *
+ * Two presentations driven by `compact`:
+ *   • compact=false (pre-upload) — a horizontal grid of six tiles with
+ *     flag + native label + ISO code, so the operator can pick before they
+ *     drop a video. Sits beneath the "Drop a product video" hint.
+ *   • compact=true (post-upload) — a single corner chip showing the active
+ *     flag + code; clicking it pops a small grid out so the language can
+ *     still be changed mid-stream without leaving the stage.
+ *
+ * Languages mirror agents/translator.py SUPPORTED. To add a 7th language,
+ * append a row to SUPPORTED + add a row here. (Same row order — the demo
+ * voice expects en first because that's the un-translated baseline.)
+ */
+export const LANGUAGES = [
+  { code: 'en', label: 'English',  flag: '🇺🇸' },
+  { code: 'es', label: 'Español',  flag: '🇪🇸' },
+  { code: 'fr', label: 'Français', flag: '🇫🇷' },
+  { code: 'de', label: 'Deutsch',  flag: '🇩🇪' },
+  { code: 'zh', label: '中文',     flag: '🇨🇳' },
+  { code: 'tl', label: 'Tagalog',  flag: '🇵🇭' },
+];
+
+export function LanguagePicker({
+  activeLanguage = 'en',
+  onChange,
+  compact = false,
+  disabled = false,
+}) {
+  const [pending, setPending] = useState(null);
+  const [expanded, setExpanded] = useState(false);
+  const containerRef = useRef(null);
+  const apiBase = `http://${window.location.hostname}:8000`;
+
+  // Auto-collapse the post-upload popout when the operator clicks
+  // anywhere outside the picker (consistent with chip/menu UX patterns
+  // — feels less intrusive than an explicit close button).
+  useEffect(() => {
+    if (!compact || !expanded) return;
+    function handleClick(e) {
+      if (containerRef.current && !containerRef.current.contains(e.target)) {
+        setExpanded(false);
+      }
+    }
+    document.addEventListener('mousedown', handleClick);
+    return () => document.removeEventListener('mousedown', handleClick);
+  }, [compact, expanded]);
+
+  async function pick(code) {
+    if (code === activeLanguage || pending || disabled) return;
+    setPending(code);
+    try {
+      const fd = new FormData();
+      fd.append('lang', code);
+      const r = await fetch(`${apiBase}/api/live/language`, {
+        method: 'POST',
+        body: fd,
+      });
+      if (!r.ok) throw new Error(`HTTP ${r.status}`);
+      // Server broadcasts language_changed over WS — useEmpireSocket
+      // updates activeLanguage from there (single source of truth).
+      // We also call onChange optimistically so the local UI updates
+      // even if the WS round-trip lags.
+      onChange?.(code);
+    } catch (e) {
+      console.warn('[language] set failed', e);
+    } finally {
+      setPending(null);
+      // Collapse the popout after a successful pick so the chip
+      // returns to its compact form and the operator sees the new
+      // selection reflected in one motion.
+      if (compact) setExpanded(false);
+    }
+  }
+
+  const active = LANGUAGES.find(l => l.code === activeLanguage) || LANGUAGES[0];
+
+  // Compact variant: collapsed chip + click-to-expand popout. Used
+  // post-upload so the picker doesn't dominate the stage but stays
+  // reachable for mid-stream language changes.
+  if (compact) {
+    return (
+      <div ref={containerRef} style={styles.compactRoot}>
+        <button
+          type="button"
+          onClick={() => setExpanded(v => !v)}
+          style={{
+            ...styles.compactChip,
+            ...(expanded ? styles.compactChipOpen : null),
+          }}
+          aria-label={`Language: ${active.label}. Click to change.`}
+        >
+          <span style={styles.compactFlag}>{active.flag}</span>
+          <span style={styles.compactCode}>{active.code.toUpperCase()}</span>
+          <span style={styles.compactCaret}>{expanded ? '▴' : '▾'}</span>
+        </button>
+
+        {expanded && (
+          <div style={styles.compactGrid}>
+            {LANGUAGES.map(lang => {
+              const isActive = lang.code === activeLanguage;
+              const isPending = lang.code === pending;
+              return (
+                <button
+                  type="button"
+                  key={lang.code}
+                  onClick={() => pick(lang.code)}
+                  disabled={isPending || disabled}
+                  style={{
+                    ...styles.compactTile,
+                    ...(isActive ? styles.compactTileActive : null),
+                    ...(isPending ? styles.tilePending : null),
+                  }}
+                >
+                  <span style={styles.compactTileFlag}>{lang.flag}</span>
+                  <span style={styles.compactTileLabel}>{lang.label}</span>
+                </button>
+              );
+            })}
+          </div>
+        )}
+      </div>
+    );
+  }
+
+  // Full variant: prominent six-tile row, used pre-upload as a "step 1:
+  // pick language" affordance directly under the "Drop a product video"
+  // hint. Matches the empty-state's visual language (monospace, dim
+  // chrome) so it doesn't compete with the drop-zone callout.
+  return (
+    <div style={styles.fullRoot}>
+      <p style={styles.fullCaption}>STEP 1 · LANGUAGE</p>
+      <div style={styles.fullGrid}>
+        {LANGUAGES.map(lang => {
+          const isActive = lang.code === activeLanguage;
+          const isPending = lang.code === pending;
+          return (
+            <button
+              type="button"
+              key={lang.code}
+              onClick={() => pick(lang.code)}
+              disabled={isPending || disabled}
+              style={{
+                ...styles.fullTile,
+                ...(isActive ? styles.fullTileActive : null),
+                ...(isPending ? styles.tilePending : null),
+              }}
+              aria-pressed={isActive}
+            >
+              <span style={styles.fullFlag}>{lang.flag}</span>
+              <span style={styles.fullLabel}>{lang.label}</span>
+              <span style={styles.fullCode}>{lang.code.toUpperCase()}</span>
+            </button>
+          );
+        })}
+      </div>
+      <p style={styles.fullSub}>
+        avatar will speak in {active.label.toLowerCase()} · same voice, translated script
+      </p>
+    </div>
+  );
+}
+
+const styles = {
+  // ── Full (pre-upload) ──────────────────────────────────────────────
+  fullRoot: {
+    display: 'flex', flexDirection: 'column',
+    alignItems: 'center', gap: 14,
+    pointerEvents: 'auto',
+  },
+  fullCaption: {
+    fontSize: 11, fontWeight: 700, letterSpacing: 2,
+    color: '#52525b', margin: 0,
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+    textTransform: 'uppercase',
+  },
+  fullGrid: {
+    display: 'flex', gap: 10, flexWrap: 'wrap',
+    justifyContent: 'center',
+  },
+  fullTile: {
+    display: 'flex', flexDirection: 'column',
+    alignItems: 'center', justifyContent: 'center',
+    gap: 4, minWidth: 84, padding: '12px 14px',
+    background: 'rgba(15,15,18,0.85)',
+    border: '1px solid #27272a',
+    borderRadius: 12,
+    color: '#a1a1aa',
+    cursor: 'pointer',
+    transition: 'transform 120ms ease, border-color 120ms ease, color 120ms ease, background 120ms ease',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+  },
+  fullTileActive: {
+    background: 'rgba(124,58,237,0.18)',
+    borderColor: 'rgba(167,139,250,0.85)',
+    color: '#fafafa',
+    transform: 'translateY(-1px)',
+    boxShadow: '0 6px 20px rgba(124,58,237,0.35), 0 0 0 1px rgba(167,139,250,0.4) inset',
+  },
+  fullFlag: {
+    fontSize: 28, lineHeight: 1,
+  },
+  fullLabel: {
+    fontSize: 11, fontWeight: 700, letterSpacing: 0.6,
+    fontFamily: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif',
+  },
+  fullCode: {
+    fontSize: 9, fontWeight: 800, letterSpacing: 1.4,
+    color: '#71717a',
+  },
+  fullSub: {
+    fontSize: 10, fontWeight: 600, letterSpacing: 1.2,
+    color: '#3f3f46', margin: 0,
+    textTransform: 'uppercase',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+  },
+
+  // ── Compact (post-upload) ──────────────────────────────────────────
+  compactRoot: {
+    position: 'relative',
+    pointerEvents: 'auto',
+  },
+  compactChip: {
+    display: 'flex', alignItems: 'center', gap: 6,
+    background: 'rgba(15,15,18,0.85)',
+    backdropFilter: 'blur(8px)',
+    WebkitBackdropFilter: 'blur(8px)',
+    border: '1px solid #27272a',
+    borderRadius: 999,
+    padding: '5px 10px 5px 8px',
+    color: '#fafafa',
+    cursor: 'pointer',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+    fontSize: 11,
+    transition: 'border-color 120ms ease, background 120ms ease',
+  },
+  compactChipOpen: {
+    borderColor: 'rgba(167,139,250,0.85)',
+    background: 'rgba(35,20,55,0.92)',
+  },
+  compactFlag: {
+    fontSize: 14, lineHeight: 1,
+  },
+  compactCode: {
+    fontWeight: 800, letterSpacing: 1.4,
+  },
+  compactCaret: {
+    fontSize: 9, color: '#a1a1aa',
+  },
+  compactGrid: {
+    position: 'absolute',
+    top: 'calc(100% + 6px)',
+    right: 0,
+    display: 'grid',
+    gridTemplateColumns: 'repeat(2, 1fr)',
+    gap: 6,
+    background: 'rgba(15,15,18,0.94)',
+    backdropFilter: 'blur(10px)',
+    WebkitBackdropFilter: 'blur(10px)',
+    border: '1px solid #27272a',
+    borderRadius: 12,
+    padding: 8,
+    minWidth: 220,
+    boxShadow: '0 14px 40px rgba(0,0,0,0.6)',
+  },
+  compactTile: {
+    display: 'flex', alignItems: 'center', gap: 8,
+    padding: '8px 10px',
+    background: 'rgba(24,24,27,0.6)',
+    border: '1px solid #27272a',
+    borderRadius: 8,
+    color: '#a1a1aa',
+    cursor: 'pointer',
+    fontFamily: 'ui-monospace, SFMono-Regular, Menlo, monospace',
+    fontSize: 11,
+    transition: 'background 120ms ease, border-color 120ms ease, color 120ms ease',
+  },
+  compactTileActive: {
+    background: 'rgba(124,58,237,0.22)',
+    borderColor: 'rgba(167,139,250,0.85)',
+    color: '#fafafa',
+  },
+  compactTileFlag: {
+    fontSize: 16, lineHeight: 1,
+  },
+  compactTileLabel: {
+    fontSize: 11, fontWeight: 700,
+    fontFamily: '-apple-system, BlinkMacSystemFont, "SF Pro Text", sans-serif',
+  },
+
+  // Shared "request in flight" affordance.
+  tilePending: {
+    opacity: 0.55, cursor: 'wait',
+  },
+};
diff --git a/dashboard/src/hooks/useEmpireSocket.js b/dashboard/src/hooks/useEmpireSocket.js
index 940903f..5b3793f 100644
--- a/dashboard/src/hooks/useEmpireSocket.js
+++ b/dashboard/src/hooks/useEmpireSocket.js
@@ -59,6 +59,20 @@ export function useEmpireSocket() {
   });
   const voiceStateTimerRef = useRef(null);
   const wsRef = useRef(null);
+  // Debug HUD state — last play_clip event observed per layer.
+  // {tier0?: {intent, url, mode, fade_ms, muted, ts}, tier1?: same}.
+  // Mirrors what useAvatarStream consumes for video playback, but kept
+  // separate so a hook-level subscriber can render an overlay without
+  // tapping the player's internal refs. Safe to subscribe twice — both
+  // useEmpireSocket and useAvatarStream listen to the same wsRef and
+  // each handler is a pure read.
+  const [activeClips, setActiveClips] = useState({ tier0: null, tier1: null });
+  // Live target language (ISO code, mirrors backend pipeline_state["active_language"]).
+  // Source of truth is the backend — we seed from GET /api/live/language on
+  // mount and keep it in sync via the `language_changed` WS event broadcast
+  // by POST /api/live/language. The LanguagePicker writes via POST; the
+  // resulting broadcast lands here so multiple open tabs stay coherent.
+  const [activeLanguage, setActiveLanguage] = useState('en');
 
   // Helper: set voice state with a safety auto-clear so a dropped follow-up
   // event can never leave the pill stuck on stage.
@@ -116,6 +130,27 @@ export function useEmpireSocket() {
         case 'status':
           setStatus(msg.status);
           break;
+        case 'play_clip':
+          // Director crossfade event. Stash per-layer for the debug HUD.
+          // useAvatarStream is the actual consumer that drives the
+          // <video> elements; this is a parallel read so we can show
+          // which clip is on screen at any moment without poking into
+          // the player's internals.
+          if (msg.layer === 'tier0' || msg.layer === 'tier1') {
+            setActiveClips(prev => ({
+              ...prev,
+              [msg.layer]: {
+                intent: msg.intent,
+                url: msg.url,
+                mode: msg.mode,
+                fade_ms: msg.fade_ms,
+                muted: msg.muted,
+                emitted_by: msg.emitted_by,
+                ts: msg.ts || Date.now(),
+              },
+            }));
+          }
+          break;
         case 'tts_audio':
           setLatestAudio({ audio: msg.audio, format: msg.format });
           break;
@@ -309,6 +344,12 @@ export function useEmpireSocket() {
             setStatus(msg.on ? 'live' : 'idle');
           }
           break;
+        case 'language_changed':
+          // Server flipped pipeline_state["active_language"] (typically
+          // because some other tab clicked the LanguagePicker). Mirror it
+          // locally so the picker on this tab updates in lockstep.
+          if (msg.lang) setActiveLanguage(msg.lang);
+          break;
         case 'voice_transcript':
           // Fires within ~200ms of push-to-talk release. Drop empty
           // transcripts (no_speech / transcription_failed) — the endpoint
@@ -340,6 +381,32 @@ export function useEmpireSocket() {
     return () => wsRef.current?.close();
   }, [connect]);
 
+  // Seed the active language from the backend once on mount. The WS event
+  // (language_changed) handles every subsequent update, so this fetch only
+  // matters for the initial paint — without it the picker would render
+  // 'en' even if the operator had picked a different language in a prior
+  // session that's still cached in pipeline_state.
+  useEffect(() => {
+    let cancelled = false;
+    (async () => {
+      try {
+        const r = await fetch(
+          `http://${window.location.hostname}:8000/api/live/language`,
+        );
+        if (!r.ok) return;
+        const data = await r.json();
+        if (!cancelled && data?.active_language) {
+          setActiveLanguage(data.active_language);
+        }
+      } catch {
+        // Backend offline / CORS / etc. — keep the default 'en' and let
+        // the user re-pick once the connection comes back; the WS
+        // handler will pick up any later changes.
+      }
+    })();
+    return () => { cancelled = true; };
+  }, []);
+
   // Derive a coarse live stage from backend status when no explicit stage event
   // has updated us recently.
   useEffect(() => {
@@ -380,5 +447,12 @@ export function useEmpireSocket() {
     audioResponse, setAudioResponse, pitchAudio, setPitchAudio,
     sendComment, sendSell,
     wsRef, // exposed so useAvatarStream can attach an extra message listener
+    // Debug HUD surface — last play_clip emit per Director layer.
+    activeClips,
+    // Live-language surface — App reads activeLanguage to drive the
+    // LanguagePicker; setActiveLanguage is the optimistic local writer
+    // (the picker calls it before the WS roundtrip lands so the UI
+    // doesn't stutter).
+    activeLanguage, setActiveLanguage,
   };
 }