diff --git a/.gitignore b/.gitignore
index 741acc764b..daf13572a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -74,6 +74,8 @@ next-env.d.ts
 # Eval results
 eval/whiteboard-layout/results/
 eval/outline-language/results/
+eval/orchestration/results/
+eval/orchestration/results-answering/
 
 # e2e screenshot artifacts
 e2e/screenshots/
diff --git a/components/agent/agent-bar.tsx b/components/agent/agent-bar.tsx
index ade89c7d57..f0b947a230 100644
--- a/components/agent/agent-bar.tsx
+++ b/components/agent/agent-bar.tsx
@@ -20,9 +20,6 @@ import {
   Volume2,
   VolumeX,
   Loader2,
-  MessageSquare,
-  Minus,
-  Plus,
   Search,
 } from 'lucide-react';
 import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip';
@@ -615,8 +612,6 @@ export function AgentBar() {
   const { listAgents } = useAgentRegistry();
   const selectedAgentIds = useSettingsStore((s) => s.selectedAgentIds);
   const setSelectedAgentIds = useSettingsStore((s) => s.setSelectedAgentIds);
-  const maxTurns = useSettingsStore((s) => s.maxTurns);
-  const setMaxTurns = useSettingsStore((s) => s.setMaxTurns);
   const agentMode = useSettingsStore((s) => s.agentMode);
   const setAgentMode = useSettingsStore((s) => s.setAgentMode);
   const ttsProvidersConfig = useSettingsStore((s) => s.ttsProvidersConfig);
@@ -937,57 +932,6 @@ export function AgentBar() {
                   </div>
                 </div>
               )}
-
-              {/* Max turns — compact stepper */}
-              <div className="flex items-center gap-1.5 px-2 py-1 mt-1 border-t border-border/30">
-                <MessageSquare className="size-3 text-muted-foreground/40 shrink-0" />
-                <span className="text-[11px] text-muted-foreground/50 flex-1">
-                  {t('settings.maxTurns')}
-                </span>
-                <div className="flex items-center rounded-full bg-muted/50 h-5 shrink-0">
-                  <button
-                    type="button"
-                    onClick={(e) => {
-                      e.stopPropagation();
-                      const v = Math.max(1, parseInt(maxTurns || '1') - 1);
-                      setMaxTurns(String(v));
-                    }}
-                    className="size-5 flex items-center justify-center text-muted-foreground/60 hover:text-foreground transition-colors rounded-full hover:bg-muted"
-                  >
-                    <Minus className="size-2.5" />
-                  </button>
-                  <input
-                    type="text"
-                    inputMode="numeric"
-                    value={maxTurns}
-                    onChange={(e) => {
-                      const raw = e.target.value.replace(/\D/g, '');
-                      if (!raw) {
-                        setMaxTurns('');
-                        return;
-                      }
-                      const v = Math.min(20, Math.max(1, parseInt(raw)));
-                      setMaxTurns(String(v));
-                    }}
-                    onBlur={() => {
-                      if (!maxTurns || parseInt(maxTurns) < 1) setMaxTurns('1');
-                    }}
-                    onClick={(e) => e.stopPropagation()}
-                    className="w-5 h-5 text-[11px] font-medium tabular-nums text-center bg-transparent outline-none border-none"
-                  />
-                  <button
-                    type="button"
-                    onClick={(e) => {
-                      e.stopPropagation();
-                      const v = Math.min(20, parseInt(maxTurns || '1') + 1);
-                      setMaxTurns(String(v));
-                    }}
-                    className="size-5 flex items-center justify-center text-muted-foreground/60 hover:text-foreground transition-colors rounded-full hover:bg-muted"
-                  >
-                    <Plus className="size-2.5" />
-                  </button>
-                </div>
-              </div>
             </div>
           </motion.div>
         )}
diff --git a/components/chat/session-list.tsx b/components/chat/session-list.tsx
index 526443efad..350dc60d19 100644
--- a/components/chat/session-list.tsx
+++ b/components/chat/session-list.tsx
@@ -3,7 +3,7 @@
 import type { ChatSession, SessionStatus } from '@/lib/types/chat';
 import { cn } from '@/lib/utils';
 import { useI18n } from '@/lib/hooks/use-i18n';
-import { ChevronDown, Circle, CheckCircle, Clock } from 'lucide-react';
+import { ChevronDown, Circle, CheckCircle, Clock, AlertCircle } from 'lucide-react';
 import { motion, AnimatePresence } from 'motion/react';
 import { ChatSessionComponent } from './chat-session';
 
@@ -32,6 +32,8 @@ function getStatusIcon(status: SessionStatus) {
       return <Clock className="size-2.5 text-yellow-500" />;
     case 'completed':
       return <CheckCircle className="size-2.5 text-gray-400" />;
+    case 'error':
+      return <AlertCircle className="size-2.5 text-red-500" />;
     case 'idle':
     default:
       return <Circle className="size-2.5 text-gray-300" />;
diff --git a/components/chat/use-chat-sessions.ts b/components/chat/use-chat-sessions.ts
index 917ba66955..c6911601c5 100644
--- a/components/chat/use-chat-sessions.ts
+++ b/components/chat/use-chat-sessions.ts
@@ -170,6 +170,7 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
         s.id === sessionId
           ? {
               ...s,
+              status: 'error' as SessionStatus,
               updatedAt: now,
               messages: [
                 ...s.messages,
@@ -456,8 +457,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
       controller: AbortController,
       sessionType: SessionType,
     ): Promise<void> => {
-      const settingsState = useSettingsStore.getState();
-
       // Attach full configs for generated (non-default) agents so the server can use them.
       // The server-side registry only has default agents; generated agents exist only client-side.
       const generatedConfigs = requestTemplate.config.agentIds
@@ -469,11 +468,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
         requestTemplate.config.agentConfigs = generatedConfigs;
       }
 
-      const defaultMaxTurns = requestTemplate.config.agentIds.length <= 1 ? 1 : 10;
-      const maxTurns = settingsState.maxTurns
-        ? parseInt(settingsState.maxTurns, 10) || defaultMaxTurns
-        : defaultMaxTurns;
-
       // Per-iteration buffer reference — set in onEvent, used in onIterationEnd
       let currentBuffer: StreamBuffer | null = null;
       // Tracks agent_start messageId so text_delta/action events with a missing
@@ -607,28 +601,40 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
           },
         },
         controller.signal,
-        maxTurns,
       );
 
-      // Handle loop completion (UI-specific)
+      // Handle loop completion (UI-specific). Map each outcome.reason to a
+      // distinct session state — don't conflate error paths with completion.
       if (!controller.signal.aborted) {
-        if (outcome.reason !== 'cue_user') {
-          setSessions((prev) =>
-            prev.map((s) =>
-              s.id === sessionId
-                ? {
-                    ...s,
-                    status: 'completed' as SessionStatus,
-                    updatedAt: Date.now(),
-                  }
-                : s,
-            ),
-          );
-          onStopSessionRef.current?.();
+        switch (outcome.reason) {
+          case 'cue_user':
+            // Session stays active; UI waits for the next user message.
+            break;
+          case 'end':
+            setSessions((prev) =>
+              prev.map((s) =>
+                s.id === sessionId
+                  ? { ...s, status: 'completed' as SessionStatus, updatedAt: Date.now() }
+                  : s,
+              ),
+            );
+            onStopSessionRef.current?.();
+            break;
+          case 'empty_turns':
+            clearLiveSessionAfterError(sessionId, t('chat.error.emptyAgentResponses'));
+            onStopSessionRef.current?.();
+            break;
+          case 'no_done':
+            clearLiveSessionAfterError(sessionId, t('chat.error.streamInterrupted'));
+            onStopSessionRef.current?.();
+            break;
+          case 'aborted':
+            // Already handled elsewhere via abort signal.
+            break;
         }
       }
     },
-    [createBufferForSession],
+    [createBufferForSession, clearLiveSessionAfterError, t],
   );
 
   /**
@@ -646,8 +652,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
       messages: [],
       config: {
         agentIds: ['default-1'],
-        maxTurns: 0, // Not used for runtime — frontend loop manages maxTurns
-        currentTurn: 0,
         defaultAgentId: 'default-1',
       },
       toolCalls: [],
@@ -1070,8 +1074,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
             messages: [userMessage],
             config: {
               agentIds,
-              maxTurns: 0, // Not used for runtime — frontend loop manages maxTurns
-              currentTurn: 0,
               defaultAgentId: agentIds[0],
             },
             toolCalls: [],
@@ -1208,8 +1210,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
         messages: [],
         config: {
           agentIds,
-          maxTurns: 0, // Not used for runtime — frontend loop manages maxTurns
-          currentTurn: 0,
           triggerAgentId: agentId,
         },
         toolCalls: [],
@@ -1370,8 +1370,6 @@ export function useChatSessions(options: UseChatSessionsOptions = {}) {
         messages: [lectureMessage],
         config: {
           agentIds: ['default-1'],
-          maxTurns: 0,
-          currentTurn: 0,
         },
         toolCalls: [],
         pendingToolCalls: [],
diff --git a/components/settings/agent-settings.tsx b/components/settings/agent-settings.tsx
index ad0c9aa8ae..26a21e993a 100644
--- a/components/settings/agent-settings.tsx
+++ b/components/settings/agent-settings.tsx
@@ -1,7 +1,6 @@
 'use client';
 
 import { Label } from '@/components/ui/label';
-import { Input } from '@/components/ui/input';
 import { Checkbox } from '@/components/ui/checkbox';
 import { AlertCircle, User, Users, Sparkles, Info } from 'lucide-react';
 import { cn } from '@/lib/utils';
@@ -20,20 +19,16 @@ interface Agent {
 interface AgentSettingsProps {
   agents: Agent[];
   selectedAgentIds: string[];
-  maxTurns: string;
   agentMode: 'preset' | 'auto';
   onToggleAgent: (agentId: string) => void;
-  onMaxTurnsChange: (value: string) => void;
   onAgentModeChange: (mode: 'preset' | 'auto') => void;
 }
 
 export function AgentSettings({
   agents,
   selectedAgentIds,
-  maxTurns,
   agentMode,
   onToggleAgent,
-  onMaxTurnsChange,
   onAgentModeChange,
 }: AgentSettingsProps) {
   const { t } = useI18n();
@@ -165,22 +160,6 @@ export function AgentSettings({
                 </span>
               )}
             </div>
-
-            {/* Max turns config - only show for multi-agent */}
-            {selectedAgentIds.length > 1 && (
-              <div className="space-y-2 border-l-4 border-purple-500 pl-4">
-                <Label>{t('settings.maxTurns')}</Label>
-                <p className="text-xs text-muted-foreground">{t('settings.maxTurnsDesc')}</p>
-                <Input
-                  type="number"
-                  min="1"
-                  max="20"
-                  value={maxTurns}
-                  onChange={(e) => onMaxTurnsChange(e.target.value)}
-                  className="w-24"
-                />
-              </div>
-            )}
           </>
         ) : (
           <>
diff --git a/eval/orchestration/answering-runner.ts b/eval/orchestration/answering-runner.ts
new file mode 100644
index 0000000000..5f3d85dd6a
--- /dev/null
+++ b/eval/orchestration/answering-runner.ts
@@ -0,0 +1,408 @@
+/**
+ * Director Question-Answering Eval (#598 / #511 follow-up)
+ *
+ * Tests whether the director routes correctly when the conversation contains
+ * an unanswered user question. The bug observed in production: when agents
+ * have drifted off-topic — whether the user has expressed frustration yet
+ * or not — the director keeps picking peer agents for "variety" instead of
+ * routing to the teacher to actually answer the literal question.
+ *
+ * Scenarios cover both shapes:
+ *   - first-turn drift, no frustration yet (the root case)
+ *   - escalated frustration after multiple complaints (the recovery case)
+ *
+ * Per-decision classification (deterministic, no LLM judge):
+ *   - TEACHER    → ✓ correct (teacher answers, or asks a clarifying question
+ *                  when the user's message is too vague)
+ *   - USER       → ✗ wrong (cue_user makes no agent speak — the user faces
+ *                  dead air; the teacher should ask the clarifying question)
+ *   - OTHER_AGENT → ✗ wrong (peer-agent "variety" routing)
+ *   - END        → ✗ wrong
+ *
+ * A/B:
+ *   - baseline  : current director template with rule 13 stripped
+ *   - with_rule : current director template as-shipped (rule 13 in place)
+ *
+ * Pass criterion: with_rule.correctRate ≥ EVAL_PASS_THRESHOLD (default 0.7).
+ * The pre-vs-post Δ is reported as informational only — scenarios where the
+ * baseline already routes correctly shouldn't fail just because there is no
+ * room to lift.
+ *
+ * Required env:
+ *   EVAL_DIRECTOR_MODEL
+ *
+ * Optional env:
+ *   EVAL_SAMPLES        Samples per (scenario, variant). Default 5.
+ *   EVAL_PASS_THRESHOLD Min with_rule correct rate per scenario. Default 0.7.
+ *   EVAL_SCENARIO       Filter to a single scenario by case_id.
+ *
+ * Output: eval/orchestration/results-answering/<model>/<timestamp>/report.md
+ */
+
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import { callLLM } from '@/lib/ai/llm';
+import { parseDirectorDecision } from '@/lib/orchestration/director-prompt';
+import {
+  summarizeConversation,
+  type OpenAIMessage,
+} from '@/lib/orchestration/summarizers/conversation-summary';
+import {
+  processSnippets,
+  processConditionalBlocks,
+  interpolateVariables,
+} from '@/lib/prompts/loader';
+import { resolveEvalModel } from '../shared/resolve-model';
+import { createRunDir } from '../shared/run-dir';
+import type { AgentTurnSummary } from '@/lib/orchestration/types';
+import type { ScenarioAgent } from './types';
+
+const OUTPUT_DIR = 'eval/orchestration/results-answering';
+
+// ==================== Types ====================
+
+interface AnsweringScenario {
+  case_id: string;
+  description: string;
+  agents: ScenarioAgent[];
+  teacherAgentId: string;
+  messages: OpenAIMessage[];
+  agentResponses: AgentTurnSummary[];
+  turnCount: number;
+  whiteboardOpen?: boolean;
+}
+
+type Variant = 'baseline' | 'with_rule';
+type DecisionClass = 'USER' | 'TEACHER' | 'OTHER_AGENT' | 'END' | 'ERROR';
+
+interface SampleResult {
+  variant: Variant;
+  raw: string;
+  classification: DecisionClass;
+  rawAgentId: string | null;
+  error?: string;
+}
+
+interface ScenarioResult {
+  case_id: string;
+  description: string;
+  samples: number;
+  baseline: { samples: SampleResult[]; rates: Record<DecisionClass, number>; correctRate: number };
+  withRule: { samples: SampleResult[]; rates: Record<DecisionClass, number>; correctRate: number };
+  delta: number;
+  passes: boolean;
+}
+
+// ==================== Prompt building ====================
+
+function readDirectorTemplate(): string {
+  const p = path.join(process.cwd(), 'lib', 'prompts', 'templates', 'director', 'system.md');
+  return fs.readFileSync(p, 'utf-8').trim();
+}
+
+/**
+ * Rule 13 was injected directly into director/system.md. To A/B against a
+ * pre-rule baseline, strip rule 13 (and its indented continuation block) out
+ * of the current template.
+ */
+function withoutAnsweringRule(template: string): string {
+  // Match rule 13 by its number (heading text is reworded often) up to the
+  // next blank-line + section header. Decoupled from the heading wording.
+  const stripped = template.replace(/^13\. \*\*[\s\S]*?(?=\n\n# )/m, '');
+  if (stripped === template) {
+    throw new Error(
+      'answering-runner: rule 13 not found in director template; eval baseline cannot be constructed',
+    );
+  }
+  return stripped.replace(/\n{3,}/g, '\n\n');
+}
+
+function buildPromptFromTemplate(
+  template: string,
+  scenario: AnsweringScenario,
+  conversationSummary: string,
+): string {
+  const agentList = scenario.agents
+    .map((a) => `- id: "${a.id}", name: "${a.name}", role: ${a.role}, priority: ${a.priority}`)
+    .join('\n');
+
+  const respondedList =
+    scenario.agentResponses.length > 0
+      ? scenario.agentResponses
+          .map(
+            (r) =>
+              `- ${r.agentName} (${r.agentId}): "${r.contentPreview}" [${r.actionCount} actions]`,
+          )
+          .join('\n')
+      : 'None yet.';
+
+  const rule1 =
+    "1. The teacher (role: teacher, highest priority) should usually speak first to address the user's question or topic.";
+
+  const vars: Record<string, unknown> = {
+    agentList,
+    respondedList,
+    conversationSummary,
+    discussionSection: '',
+    whiteboardSection: '',
+    studentProfileSection: '',
+    rule1,
+    turnCountPlusOne: scenario.turnCount + 1,
+    whiteboardOpenText: scenario.whiteboardOpen
+      ? 'OPEN (slide canvas is hidden — spotlight/laser will not work)'
+      : 'CLOSED (slide canvas is visible)',
+  };
+
+  const withSnippets = processSnippets(template);
+  const withConditionals = processConditionalBlocks(withSnippets, vars);
+  return interpolateVariables(withConditionals, vars);
+}
+
+function buildVariants(scenario: AnsweringScenario): { baseline: string; with_rule: string } {
+  const current = readDirectorTemplate();
+  const summary = summarizeConversation(scenario.messages);
+  return {
+    baseline: buildPromptFromTemplate(withoutAnsweringRule(current), scenario, summary),
+    with_rule: buildPromptFromTemplate(current, scenario, summary),
+  };
+}
+
+// ==================== Classifier ====================
+
+function classify(
+  raw: string,
+  scenario: AnsweringScenario,
+): {
+  classification: DecisionClass;
+  rawAgentId: string | null;
+} {
+  const parsed = parseDirectorDecision(raw);
+  if (parsed.shouldEnd || !parsed.nextAgentId) {
+    return { classification: 'END', rawAgentId: null };
+  }
+  if (parsed.nextAgentId === 'USER') {
+    return { classification: 'USER', rawAgentId: 'USER' };
+  }
+  if (parsed.nextAgentId === scenario.teacherAgentId) {
+    return { classification: 'TEACHER', rawAgentId: parsed.nextAgentId };
+  }
+  return { classification: 'OTHER_AGENT', rawAgentId: parsed.nextAgentId };
+}
+
+function emptyRates(): Record<DecisionClass, number> {
+  return { USER: 0, TEACHER: 0, OTHER_AGENT: 0, END: 0, ERROR: 0 };
+}
+
+function computeRates(samples: SampleResult[]): {
+  rates: Record<DecisionClass, number>;
+  correctRate: number;
+} {
+  const rates = emptyRates();
+  const usable = samples.filter((s) => !s.error);
+  for (const s of usable) rates[s.classification]++;
+  const total = usable.length || 1;
+  for (const k of Object.keys(rates) as DecisionClass[]) {
+    rates[k] = rates[k] / total;
+  }
+  rates.ERROR = (samples.length - usable.length) / samples.length;
+  // Only TEACHER is correct: the teacher answers, or asks a clarifying question
+  // for vague input. USER cue is dead air (no agent speaks); peer/END are wrong.
+  const correctRate = rates.TEACHER;
+  return { rates, correctRate };
+}
+
+// ==================== Sampling ====================
+
+async function sampleVariant(
+  scenario: AnsweringScenario,
+  variant: Variant,
+  systemPrompt: string,
+  model: Awaited<ReturnType<typeof resolveEvalModel>>['model'],
+  samples: number,
+): Promise<SampleResult[]> {
+  const tasks = Array.from({ length: samples }, async (): Promise<SampleResult> => {
+    try {
+      const result = await callLLM(
+        {
+          model,
+          messages: [
+            { role: 'system', content: systemPrompt },
+            { role: 'user', content: 'Decide which agent should speak next.' },
+          ],
+        },
+        'eval-orchestration-answering',
+      );
+      const raw = result.text;
+      const { classification, rawAgentId } = classify(raw, scenario);
+      return { variant, raw, classification, rawAgentId };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      return {
+        variant,
+        raw: '',
+        classification: 'ERROR',
+        rawAgentId: null,
+        error: msg,
+      };
+    }
+  });
+  return Promise.all(tasks);
+}
+
+// ==================== Reporting ====================
+
+function pct(x: number): string {
+  return `${Math.round(x * 100)}%`;
+}
+
+function writeReport(
+  runDir: string,
+  results: ScenarioResult[],
+  modelStr: string,
+  samples: number,
+  threshold: number,
+): string {
+  const lines: string[] = [];
+  const overallPass = results.every((r) => r.passes);
+  const meanBaseline = results.reduce((acc, r) => acc + r.baseline.correctRate, 0) / results.length;
+  const meanWithRule = results.reduce((acc, r) => acc + r.withRule.correctRate, 0) / results.length;
+
+  lines.push(`# Director Question-Answering Eval`, ``);
+  lines.push(`- **Date**: ${new Date().toISOString()}`);
+  lines.push(`- **Model**: ${modelStr}`);
+  lines.push(`- **Samples per (scenario, variant)**: ${samples}`);
+  lines.push(`- **with_rule correct-rate threshold**: ${pct(threshold)}`);
+  lines.push(`- **Δ (pre vs post)**: informational — PASS depends only on with_rule rate`);
+  lines.push(``);
+  lines.push(`## Aggregate`);
+  lines.push(``);
+  lines.push(`| Variant | Mean correct rate (TEACHER) |`);
+  lines.push(`|---|---|`);
+  lines.push(`| baseline | ${pct(meanBaseline)} |`);
+  lines.push(`| with_rule | ${pct(meanWithRule)} |`);
+  lines.push(`| Δ | ${pct(meanWithRule - meanBaseline)} |`);
+  lines.push(``);
+  lines.push(`Overall verdict: **${overallPass ? 'PASS' : 'FAIL'}**`);
+  lines.push(``);
+
+  lines.push(`## Per scenario`);
+  lines.push(``);
+  lines.push(
+    `| # | Scenario | Baseline USER% TEACHER% OTHER% END% | with_rule USER% TEACHER% OTHER% END% | Δ correct | pass? |`,
+  );
+  lines.push(`|---|---|---|---|---|---|`);
+  results.forEach((r, i) => {
+    const b = r.baseline.rates;
+    const w = r.withRule.rates;
+    const bStr = `${pct(b.USER)}/${pct(b.TEACHER)}/${pct(b.OTHER_AGENT)}/${pct(b.END)}`;
+    const wStr = `${pct(w.USER)}/${pct(w.TEACHER)}/${pct(w.OTHER_AGENT)}/${pct(w.END)}`;
+    lines.push(
+      `| ${i + 1} | ${r.case_id} | ${bStr} | ${wStr} | ${pct(r.delta)} | ${r.passes ? '✓' : '✗'} |`,
+    );
+  });
+  lines.push(``);
+
+  lines.push(`## Detail`);
+  for (const r of results) {
+    lines.push(``, `### ${r.case_id} ${r.passes ? '✓' : '✗'}`, ``);
+    lines.push(`- ${r.description}`);
+    lines.push(
+      `- Baseline correct: ${pct(r.baseline.correctRate)}; with_rule correct: ${pct(r.withRule.correctRate)}; Δ: ${pct(r.delta)}`,
+    );
+    lines.push(``);
+    lines.push(`<details><summary>baseline samples</summary>`, ``);
+    for (const s of r.baseline.samples) {
+      const label = s.error
+        ? `ERROR: ${s.error}`
+        : `${s.classification}${s.rawAgentId && s.classification === 'OTHER_AGENT' ? ` (${s.rawAgentId})` : ''}`;
+      lines.push(`- ${label}`);
+    }
+    lines.push(``, `</details>`, ``);
+    lines.push(`<details><summary>with_rule samples</summary>`, ``);
+    for (const s of r.withRule.samples) {
+      const label = s.error
+        ? `ERROR: ${s.error}`
+        : `${s.classification}${s.rawAgentId && s.classification === 'OTHER_AGENT' ? ` (${s.rawAgentId})` : ''}`;
+      lines.push(`- ${label}`);
+    }
+    lines.push(``, `</details>`, ``);
+  }
+
+  const reportPath = path.join(runDir, 'report.md');
+  fs.writeFileSync(reportPath, lines.join('\n'));
+  return reportPath;
+}
+
+// ==================== Main ====================
+
+function getCurrentDir(): string {
+  return typeof __dirname !== 'undefined'
+    ? __dirname
+    : path.dirname(fileURLToPath(import.meta.url));
+}
+
+function loadScenarios(): AnsweringScenario[] {
+  const p = path.join(getCurrentDir(), 'scenarios/answering.json');
+  const scenarios = JSON.parse(fs.readFileSync(p, 'utf-8')) as AnsweringScenario[];
+  const filter = process.env.EVAL_SCENARIO;
+  return filter ? scenarios.filter((s) => s.case_id === filter) : scenarios;
+}
+
+async function main() {
+  const modelStr = process.env.EVAL_DIRECTOR_MODEL || process.env.DEFAULT_MODEL;
+  if (!modelStr) {
+    console.error(
+      'Error: EVAL_DIRECTOR_MODEL must be set. Example: EVAL_DIRECTOR_MODEL=google:gemini-3-flash-preview',
+    );
+    process.exit(1);
+  }
+  const samples = Number(process.env.EVAL_SAMPLES || '5');
+  const threshold = Number(process.env.EVAL_PASS_THRESHOLD || '0.7');
+
+  console.log('=== Director Question-Answering Eval ===');
+  console.log(`Model: ${modelStr} | Samples/variant: ${samples} | pass threshold: ${threshold}`);
+
+  const { model } = await resolveEvalModel('EVAL_DIRECTOR_MODEL', process.env.DEFAULT_MODEL);
+  const scenarios = loadScenarios();
+  console.log(`Loaded ${scenarios.length} scenario(s)`);
+  const runDir = createRunDir(OUTPUT_DIR, modelStr);
+  console.log(`Output: ${runDir}`);
+
+  const results: ScenarioResult[] = [];
+  for (const sc of scenarios) {
+    process.stdout.write(`  - ${sc.case_id} ... `);
+    const variants = buildVariants(sc);
+    const [bs, ws] = await Promise.all([
+      sampleVariant(sc, 'baseline', variants.baseline, model, samples),
+      sampleVariant(sc, 'with_rule', variants.with_rule, model, samples),
+    ]);
+    const bAgg = computeRates(bs);
+    const wAgg = computeRates(ws);
+    const lift = wAgg.correctRate - bAgg.correctRate;
+    const passes = wAgg.correctRate >= threshold;
+    results.push({
+      case_id: sc.case_id,
+      description: sc.description,
+      samples,
+      baseline: { samples: bs, rates: bAgg.rates, correctRate: bAgg.correctRate },
+      withRule: { samples: ws, rates: wAgg.rates, correctRate: wAgg.correctRate },
+      delta: lift,
+      passes,
+    });
+    console.log(
+      `baseline=${pct(bAgg.correctRate)} with_rule=${pct(wAgg.correctRate)} Δ=${pct(lift)} ${passes ? 'PASS' : 'FAIL'}`,
+    );
+  }
+
+  const reportPath = writeReport(runDir, results, modelStr, samples, threshold);
+  const overallPass = results.every((r) => r.passes);
+  console.log(`\nReport: ${reportPath}`);
+  console.log(`Verdict: ${overallPass ? 'PASS' : 'FAIL'}`);
+  process.exit(overallPass ? 0 : 1);
+}
+
+main().catch((err) => {
+  console.error('Fatal error:', err);
+  process.exit(1);
+});
diff --git a/eval/orchestration/judge.ts b/eval/orchestration/judge.ts
new file mode 100644
index 0000000000..db7527b4d0
--- /dev/null
+++ b/eval/orchestration/judge.ts
@@ -0,0 +1,35 @@
+/**
+ * Deterministic verdict for premature-END scenarios.
+ *
+ * The bug we are guarding against is "director picks END while a student
+ * question is unresolved". For each sample, the only thing that matters is
+ * whether the director chose END. No LLM-as-judge here — END/not-END is
+ * binary and reading parseDirectorDecision is sufficient.
+ */
+
+import { parseDirectorDecision } from '@/lib/orchestration/director-prompt';
+
+export interface ParsedSample {
+  decision: 'END' | 'USER' | string;
+  isEnd: boolean;
+}
+
+export function classifyDecision(raw: string): ParsedSample {
+  const parsed = parseDirectorDecision(raw);
+  if (parsed.shouldEnd || !parsed.nextAgentId) {
+    return { decision: 'END', isEnd: true };
+  }
+  return { decision: parsed.nextAgentId, isEnd: false };
+}
+
+/**
+ * END rate over non-errored samples. Errored samples are excluded so API
+ * failures (e.g. provider 'Forbidden') don't masquerade as deterministic END
+ * behavior. Returns 0 if no samples are usable.
+ */
+export function endRate(samples: { isEnd: boolean; error?: string }[]): number {
+  const usable = samples.filter((s) => !s.error);
+  if (usable.length === 0) return 0;
+  const ends = usable.filter((s) => s.isEnd).length;
+  return ends / usable.length;
+}
diff --git a/eval/orchestration/prompt-variants.ts b/eval/orchestration/prompt-variants.ts
new file mode 100644
index 0000000000..90b07175d8
--- /dev/null
+++ b/eval/orchestration/prompt-variants.ts
@@ -0,0 +1,182 @@
+/**
+ * Build director system prompts for both the "post-fix" (current main) and
+ * "pre-fix" (rules 10/11/12 removed) variants, so the eval can A/B them on
+ * the same conversation context.
+ *
+ * Rules 10/11/12 are the prompt-layer guardrails added by #554. The pre-fix
+ * variant mimics main^ by dropping them from the # Rules section.
+ *
+ * We avoid the public `buildDirectorPrompt()` because it always loads the
+ * current template. Here we read the template directly, optionally edit it,
+ * then run the same processSnippets → interpolateVariables pipeline.
+ */
+
+import fs from 'fs';
+import path from 'path';
+import {
+  processSnippets,
+  processConditionalBlocks,
+  interpolateVariables,
+} from '@/lib/prompts/loader';
+import type { OpenAIMessage } from '@/lib/orchestration/summarizers/conversation-summary';
+import { summarizeConversation } from '@/lib/orchestration/summarizers/conversation-summary';
+import type { ScenarioAgent } from './types';
+import type { AgentTurnSummary } from '@/lib/orchestration/types';
+
+/** Rule numbers introduced by #554 that the pre-fix variant must strip. */
+const FIX_RULE_NUMBERS = [10, 11, 12] as const;
+
+function readDirectorTemplate(): string {
+  const p = path.join(process.cwd(), 'lib', 'prompts', 'templates', 'director', 'system.md');
+  return fs.readFileSync(p, 'utf-8').trim();
+}
+
+/**
+ * Strip rules 10/11/12 from the # Rules section. Each rule is a single line
+ * in the current template; we match by leading `^(10|11|12)\.\s` and drop
+ * the whole line. Throws if any expected rule is missing so a template
+ * rewrite forces us to revisit this eval.
+ */
+export function stripFixRules(template: string): string {
+  const lines = template.split('\n');
+  const kept: string[] = [];
+  const dropped = new Set<number>();
+  for (const line of lines) {
+    const m = line.match(/^(\d+)\.\s/);
+    if (m) {
+      const n = Number(m[1]);
+      if ((FIX_RULE_NUMBERS as readonly number[]).includes(n)) {
+        dropped.add(n);
+        continue;
+      }
+    }
+    kept.push(line);
+  }
+  for (const n of FIX_RULE_NUMBERS) {
+    if (!dropped.has(n)) {
+      throw new Error(
+        `prompt-variants: expected rule ${n} to exist in director/system.md; template may have been rewritten — update FIX_RULE_NUMBERS or this eval.`,
+      );
+    }
+  }
+  return kept.join('\n');
+}
+
+export interface BuildArgs {
+  agents: ScenarioAgent[];
+  messages: OpenAIMessage[];
+  agentResponses: AgentTurnSummary[];
+  turnCount: number;
+  discussionContext?: { topic: string; prompt?: string } | null;
+  triggerAgentId?: string | null;
+  userProfile?: { nickname?: string; bio?: string };
+  whiteboardOpen?: boolean;
+}
+
+/**
+ * Pre-#554 summarizeConversation: labels every role:'user' as [User] and
+ * role:'assistant' as [Assistant], with no [senderName]: prefix stripping.
+ * Used by the pre-fix variant so the eval A/B reflects both halves of #554
+ * (the role-aware summary AND the new prompt rules), not just the rules.
+ */
+function summarizeConversationPreFix(
+  messages: OpenAIMessage[],
+  maxMessages = 10,
+  maxContentLength = 200,
+): string {
+  if (messages.length === 0) return 'No conversation history yet.';
+  const recent = messages.slice(-maxMessages);
+  const lines = recent.map((msg) => {
+    const roleLabel =
+      msg.role === 'user' ? 'User' : msg.role === 'assistant' ? 'Assistant' : 'System';
+    const content =
+      msg.content.length > maxContentLength
+        ? msg.content.slice(0, maxContentLength) + '...'
+        : msg.content;
+    return `[${roleLabel}] ${content}`;
+  });
+  return lines.join('\n');
+}
+
+/**
+ * Mirrors lib/orchestration/director-prompt.ts `buildDirectorPrompt()` shape
+ * but lets us inject a pre-stripped template. Kept in sync with that file —
+ * if you change variable names there, change them here.
+ */
+function buildPromptFromTemplate(
+  template: string,
+  args: BuildArgs,
+  conversationSummary: string,
+): string {
+  const {
+    agents,
+    agentResponses,
+    turnCount,
+    discussionContext,
+    triggerAgentId,
+    userProfile,
+    whiteboardOpen,
+  } = args;
+
+  const agentList = agents
+    .map((a) => `- id: "${a.id}", name: "${a.name}", role: ${a.role}, priority: ${a.priority}`)
+    .join('\n');
+
+  const respondedList =
+    agentResponses.length > 0
+      ? agentResponses
+          .map(
+            (r) =>
+              `- ${r.agentName} (${r.agentId}): "${r.contentPreview}" [${r.actionCount} actions]`,
+          )
+          .join('\n')
+      : 'None yet.';
+
+  const isDiscussion = !!discussionContext;
+  const discussionSection = isDiscussion
+    ? `\n# Discussion Mode\nTopic: "${discussionContext!.topic}"${discussionContext!.prompt ? `\nPrompt: "${discussionContext!.prompt}"` : ''}${triggerAgentId ? `\nInitiator: "${triggerAgentId}"` : ''}\nThis is a student-initiated discussion, not a Q&A session.\n`
+    : '';
+
+  const rule1 = isDiscussion
+    ? `1. The discussion initiator${triggerAgentId ? ` ("${triggerAgentId}")` : ''} should speak first to kick off the topic. Then the teacher responds to guide the discussion. After that, other students may add their perspectives.`
+    : "1. The teacher (role: teacher, highest priority) should usually speak first to address the user's question or topic.";
+
+  const studentProfileSection =
+    userProfile?.nickname || userProfile?.bio
+      ? `\n# Student Profile\nStudent name: ${userProfile.nickname || 'Unknown'}\n${userProfile.bio ? `Background: ${userProfile.bio}` : ''}\n`
+      : '';
+
+  const vars: Record<string, unknown> = {
+    agentList,
+    respondedList,
+    conversationSummary,
+    discussionSection,
+    whiteboardSection: '',
+    studentProfileSection,
+    rule1,
+    turnCountPlusOne: turnCount + 1,
+    whiteboardOpenText: whiteboardOpen
+      ? 'OPEN (slide canvas is hidden — spotlight/laser will not work)'
+      : 'CLOSED (slide canvas is visible)',
+  };
+
+  const withSnippets = processSnippets(template);
+  const withConditionals = processConditionalBlocks(withSnippets, vars);
+  return interpolateVariables(withConditionals, vars);
+}
+
+/**
+ * Build both variants. The pre-fix variant uses both the old summary labels
+ * ([User]/[Assistant]) AND the system.md without rules 10/11/12 — together
+ * those are the full state of main^ relative to #554.
+ */
+export function buildVariants(args: BuildArgs): { preFix: string; postFix: string } {
+  const post = readDirectorTemplate();
+  const pre = stripFixRules(post);
+  const postSummary = summarizeConversation(args.messages);
+  const preSummary = summarizeConversationPreFix(args.messages);
+  return {
+    preFix: buildPromptFromTemplate(pre, args, preSummary),
+    postFix: buildPromptFromTemplate(post, args, postSummary),
+  };
+}
diff --git a/eval/orchestration/reporter.ts b/eval/orchestration/reporter.ts
new file mode 100644
index 0000000000..5365c6340b
--- /dev/null
+++ b/eval/orchestration/reporter.ts
@@ -0,0 +1,88 @@
+import { writeFileSync } from 'fs';
+import { join } from 'path';
+import { renderHeader, renderSummaryTable } from '../shared/markdown-report';
+import type { EvalReport } from './types';
+
+function pct(rate: number): string {
+  return `${Math.round(rate * 100)}%`;
+}
+
+function countErrors(samples: { error?: string }[]): number {
+  return samples.filter((s) => s.error).length;
+}
+
+/**
+ * Write `report.md` summarising pre-fix vs post-fix END rates per scenario.
+ * Returns the absolute path of the written report.
+ */
+export function writeReport(runDir: string, report: EvalReport): string {
+  const lines: string[] = [];
+  lines.push(
+    ...renderHeader({
+      title: 'Director Premature-END Regression Eval',
+      timestamp: new Date().toISOString(),
+      model: report.model,
+      extra: {
+        'Samples per variant': report.samplesPerVariant,
+        'Post-fix END threshold (regression guard)': pct(report.postFixEndThreshold),
+        'Discrimination threshold (Δ END-rate, informational)': pct(report.thresholdDelta),
+        Method:
+          'A/B director prompt + summary: post-fix = current main; pre-fix = pre-#554 [User]/[Assistant] summary labels AND system.md without rules 10/11/12',
+        'Post-fix regression guard (must hold)': report.allPostFixPass ? 'PASS' : 'FAIL',
+        'Any scenario discriminates? (informational)': report.anyDiscriminates ? 'YES' : 'NO',
+      },
+    }),
+  );
+
+  lines.push(`## Detail`, ``);
+  for (const r of report.results) {
+    const pass = r.postFixPasses ? 'PASS' : '**FAIL**';
+    const disc = r.discriminates ? ' (Δ ≥ threshold)' : '';
+    lines.push(`### ${pass}${disc} ${r.case_id}`, ``);
+    lines.push(`- **Description**: ${r.description}`);
+    const preErr = countErrors(r.preFix.samples);
+    const postErr = countErrors(r.postFix.samples);
+    lines.push(`- **Samples per variant**: ${r.samples} (rates exclude errored samples)`);
+    lines.push(
+      `- **Pre-fix END rate**: ${pct(r.preFix.endRate)}${preErr ? ` — ${preErr} error(s)` : ''}`,
+    );
+    lines.push(
+      `- **Post-fix END rate**: ${pct(r.postFix.endRate)}${postErr ? ` — ${postErr} error(s)` : ''}`,
+    );
+    lines.push(`- **Δ (pre − post)**: ${pct(r.delta)}`);
+    lines.push(``);
+    lines.push(`<details><summary>Pre-fix raw decisions</summary>`, ``);
+    for (const s of r.preFix.samples) {
+      const label = s.error ? `ERROR: ${s.error}` : s.isEnd ? '**END**' : s.decision;
+      lines.push(`- ${label}`);
+    }
+    lines.push(``, `</details>`, ``);
+    lines.push(`<details><summary>Post-fix raw decisions</summary>`, ``);
+    for (const s of r.postFix.samples) {
+      const label = s.error ? `ERROR: ${s.error}` : s.isEnd ? '**END**' : s.decision;
+      lines.push(`- ${label}`);
+    }
+    lines.push(``, `</details>`, ``);
+  }
+
+  lines.push(`## Summary`, ``);
+  const rows: string[][] = report.results.map((r, i) => [
+    String(i + 1),
+    r.case_id,
+    pct(r.preFix.endRate),
+    pct(r.postFix.endRate),
+    pct(r.delta),
+    r.postFixPasses ? 'PASS' : 'FAIL',
+    r.discriminates ? 'YES' : 'no',
+  ]);
+  lines.push(
+    ...renderSummaryTable(
+      ['#', 'Scenario', 'Pre-fix END', 'Post-fix END', 'Δ', 'Regression guard', 'Discriminates'],
+      rows,
+    ),
+  );
+
+  const reportPath = join(runDir, 'report.md');
+  writeFileSync(reportPath, lines.join('\n'));
+  return reportPath;
+}
diff --git a/eval/orchestration/runner.ts b/eval/orchestration/runner.ts
new file mode 100644
index 0000000000..8b79d8a841
--- /dev/null
+++ b/eval/orchestration/runner.ts
@@ -0,0 +1,193 @@
+/**
+ * Orchestration Premature-END Regression Eval
+ *
+ * For each scenario, builds the director system prompt twice:
+ *   - "pre-fix"  : current director/system.md with rules 10/11/12 removed
+ *   - "post-fix" : current director/system.md as-shipped
+ * Calls the LLM N times per variant, parses each decision, and reports the
+ * END rate for both. A scenario "discriminates" when (pre − post) ≥ delta.
+ *
+ * Required env:
+ *   EVAL_DIRECTOR_MODEL  Model under test (or DEFAULT_MODEL fallback)
+ *
+ * Optional env:
+ *   EVAL_SAMPLES         Samples per (scenario, variant). Default 5.
+ *   EVAL_DELTA           Discrimination threshold for pre-vs-post Δ (0..1). Default 0.3.
+ *   EVAL_END_THRESHOLD   Max acceptable post-fix END rate per scenario (0..1). Default 0.2.
+ *   EVAL_SCENARIO        Filter to a single scenario by case_id.
+ *
+ * Usage:
+ *   EVAL_DIRECTOR_MODEL=openai:gpt-4.1-mini pnpm eval:orchestration
+ *
+ * Output: eval/orchestration/results/<model>/<timestamp>/report.md
+ *
+ * Exit code:
+ *   0 — every scenario's post-fix END rate is at or below EVAL_END_THRESHOLD
+ *       (the regression guard holds for this model)
+ *   1 — some scenario's post-fix END rate exceeded the threshold
+ *       (potential regression of #554's premature-END fix)
+ */
+
+import { readFileSync } from 'fs';
+import { join, dirname } from 'path';
+import { fileURLToPath } from 'url';
+import { callLLM } from '@/lib/ai/llm';
+import { resolveEvalModel } from '../shared/resolve-model';
+import { createRunDir } from '../shared/run-dir';
+import { classifyDecision, endRate } from './judge';
+import { buildVariants } from './prompt-variants';
+import { writeReport } from './reporter';
+import type { EvalReport, PromptVariant, SampleResult, Scenario, ScenarioResult } from './types';
+
+const OUTPUT_DIR = 'eval/orchestration/results';
+
+function getCurrentDir(): string {
+  return typeof __dirname !== 'undefined' ? __dirname : dirname(fileURLToPath(import.meta.url));
+}
+
+function loadScenarios(): Scenario[] {
+  const path = join(getCurrentDir(), 'scenarios/premature-end.json');
+  const scenarios = JSON.parse(readFileSync(path, 'utf-8')) as Scenario[];
+  const filter = process.env.EVAL_SCENARIO;
+  return filter ? scenarios.filter((s) => s.case_id === filter) : scenarios;
+}
+
+function requireModelEnv(): string {
+  const modelStr = process.env.EVAL_DIRECTOR_MODEL || process.env.DEFAULT_MODEL;
+  if (!modelStr) {
+    console.error(
+      'Error: EVAL_DIRECTOR_MODEL (or DEFAULT_MODEL) must be set. Example: EVAL_DIRECTOR_MODEL=openai:gpt-4.1-mini',
+    );
+    process.exit(1);
+  }
+  return modelStr;
+}
+
+async function callDirector(
+  model: Awaited<ReturnType<typeof resolveEvalModel>>['model'],
+  systemPrompt: string,
+): Promise<string> {
+  const result = await callLLM(
+    {
+      model,
+      messages: [
+        { role: 'system', content: systemPrompt },
+        { role: 'user', content: 'Decide which agent should speak next.' },
+      ],
+    },
+    'eval-orchestration',
+  );
+  return result.text;
+}
+
+async function sampleVariant(
+  scenario: Scenario,
+  variant: PromptVariant,
+  systemPrompt: string,
+  model: Awaited<ReturnType<typeof resolveEvalModel>>['model'],
+  samples: number,
+): Promise<SampleResult[]> {
+  const tasks = Array.from({ length: samples }, async (): Promise<SampleResult> => {
+    try {
+      const raw = await callDirector(model, systemPrompt);
+      const { decision, isEnd } = classifyDecision(raw);
+      return { variant, raw, decision, isEnd };
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      // Don't conflate API failures with END decisions — that polluted earlier
+      // sweeps (e.g. anthropic 'Forbidden' showing as 100% END). Mark erroneous
+      // samples so the rate calculator excludes them.
+      return { variant, raw: '', decision: 'ERROR', isEnd: false, error: msg };
+    }
+  });
+  return Promise.all(tasks);
+}
+
+async function runScenario(
+  scenario: Scenario,
+  model: Awaited<ReturnType<typeof resolveEvalModel>>['model'],
+  samples: number,
+  thresholdDelta: number,
+  postFixEndThreshold: number,
+): Promise<ScenarioResult> {
+  const { preFix, postFix } = buildVariants({
+    agents: scenario.agents,
+    messages: scenario.messages,
+    agentResponses: scenario.agentResponses,
+    turnCount: scenario.turnCount,
+    discussionContext: scenario.discussionContext ?? null,
+    triggerAgentId: scenario.triggerAgentId ?? null,
+    userProfile: scenario.userProfile,
+    whiteboardOpen: scenario.whiteboardOpen ?? false,
+  });
+
+  const [preSamples, postSamples] = await Promise.all([
+    sampleVariant(scenario, 'pre-fix', preFix, model, samples),
+    sampleVariant(scenario, 'post-fix', postFix, model, samples),
+  ]);
+
+  const preRate = endRate(preSamples);
+  const postRate = endRate(postSamples);
+  const delta = preRate - postRate;
+  return {
+    case_id: scenario.case_id,
+    description: scenario.description,
+    samples,
+    preFix: { endRate: preRate, samples: preSamples },
+    postFix: { endRate: postRate, samples: postSamples },
+    delta,
+    discriminates: delta >= thresholdDelta,
+    postFixPasses: postRate <= postFixEndThreshold,
+  };
+}
+
+async function main() {
+  const modelStr = requireModelEnv();
+  const samples = Number(process.env.EVAL_SAMPLES || '5');
+  const thresholdDelta = Number(process.env.EVAL_DELTA || '0.3');
+  const postFixEndThreshold = Number(process.env.EVAL_END_THRESHOLD || '0.2');
+
+  console.log('=== Director Premature-END Regression Eval ===');
+  console.log(
+    `Model: ${modelStr} | Samples/variant: ${samples} | Δ threshold: ${thresholdDelta} | post-fix END threshold: ${postFixEndThreshold}`,
+  );
+
+  const { model } = await resolveEvalModel('EVAL_DIRECTOR_MODEL', process.env.DEFAULT_MODEL);
+  const scenarios = loadScenarios();
+  console.log(`Loaded ${scenarios.length} scenario(s)`);
+
+  const runDir = createRunDir(OUTPUT_DIR, modelStr);
+  console.log(`Output: ${runDir}`);
+
+  const results: ScenarioResult[] = [];
+  for (const sc of scenarios) {
+    process.stdout.write(`  - ${sc.case_id} ... `);
+    const r = await runScenario(sc, model, samples, thresholdDelta, postFixEndThreshold);
+    results.push(r);
+    console.log(
+      `pre=${Math.round(r.preFix.endRate * 100)}% post=${Math.round(r.postFix.endRate * 100)}% Δ=${Math.round(r.delta * 100)}% ${r.postFixPasses ? 'PASS' : 'FAIL'}${r.discriminates ? ' (discriminates)' : ''}`,
+    );
+  }
+
+  const anyDiscriminates = results.some((r) => r.discriminates);
+  const allPostFixPass = results.every((r) => r.postFixPasses);
+  const report: EvalReport = {
+    model: modelStr,
+    samplesPerVariant: samples,
+    thresholdDelta,
+    postFixEndThreshold,
+    results,
+    anyDiscriminates,
+    allPostFixPass,
+  };
+  const reportPath = writeReport(runDir, report);
+  console.log(`\nReport: ${reportPath}`);
+  console.log(`Post-fix regression guard: ${allPostFixPass ? 'PASS' : 'FAIL'}`);
+  console.log(`Any scenario discriminates (informational): ${anyDiscriminates ? 'YES' : 'NO'}`);
+  process.exit(allPostFixPass ? 0 : 1);
+}
+
+main().catch((err) => {
+  console.error('Fatal error:', err);
+  process.exit(1);
+});
diff --git a/eval/orchestration/scenarios/answering.json b/eval/orchestration/scenarios/answering.json
new file mode 100644
index 0000000000..48cc579b70
--- /dev/null
+++ b/eval/orchestration/scenarios/answering.json
@@ -0,0 +1,422 @@
+[
+  {
+    "case_id": "math_quadratic_axis_drift_no_frustration",
+    "description": "First-turn drift, no frustration yet. User asks for the axis-of-symmetry x-coordinate formula; teacher + student + assistant drift to geometric meaning, opening direction, zero-midpoint. None gave the formula x = -b/(2a). Director must route to teacher to give the actual answer.",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "李老师",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-2",
+        "name": "助教小韩",
+        "role": "assistant",
+        "priority": 7
+      },
+      {
+        "id": "default-3",
+        "name": "小明",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: y=ax²+bx+c 这个二次函数的对称轴 x 坐标公式是什么?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"对称轴是把抛物线分成左右两半完全镜像的那条竖直线。任何一条抛物线都有且只有这样一条对称轴。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"对了老师,a 的正负还决定开口方向呢——a>0 朝上,a<0 朝下。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"补充一下:如果方程有两个实根 x₁、x₂,那对称轴就刚好穿过两根的中点。\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "李老师",
+        "contentPreview": "对称轴是把抛物线分成左右两半完全镜像的那条竖直线。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-3",
+        "agentName": "小明",
+        "contentPreview": "对了老师,a 的正负还决定开口方向呢——a>0 朝上,a<0 朝下。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-2",
+        "agentName": "助教小韩",
+        "contentPreview": "如果方程有两个实根 x₁、x₂,那对称轴就刚好穿过两根的中点。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 3,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "bio_dark_reaction_drift_no_frustration",
+    "description": "First-turn drift, no frustration yet. User asks whether dark reactions need enzymes; agents drift to location, light-reaction products, and general definition without ever mentioning enzymes (or RuBisCO). Director must route to teacher.",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "陈老师",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-2",
+        "name": "小光助教",
+        "role": "assistant",
+        "priority": 7
+      },
+      {
+        "id": "default-3",
+        "name": "好奇宝宝",
+        "role": "student",
+        "priority": 5
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: 光合作用的暗反应需要酶吗?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"暗反应发生在叶绿体的基质里,跟光反应分隔开。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"对,光反应给暗反应供能——光反应在类囊体上产生 ATP 和 NADPH。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"那暗反应整体就是把 CO₂ 通过卡尔文循环一步步固定成糖。\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "陈老师",
+        "contentPreview": "暗反应发生在叶绿体的基质里,跟光反应分隔开。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-2",
+        "agentName": "小光助教",
+        "contentPreview": "光反应给暗反应供能——光反应产生 ATP 和 NADPH。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-3",
+        "agentName": "好奇宝宝",
+        "contentPreview": "暗反应整体就是把 CO₂ 通过卡尔文循环固定成糖。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 3,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "english_team_isare_drift_no_frustration",
+    "description": "First-turn drift, no frustration yet. User asks 'team is or team are'; teacher and assistant give the abstract collective-noun framework and example list but never the literal answer. Director must route to teacher.",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "Tina 老师",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-2",
+        "name": "Mike 助教",
+        "role": "assistant",
+        "priority": 7
+      },
+      {
+        "id": "default-3",
+        "name": "小米同学",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: team is 还是 team are 哪个对?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"主谓一致里最有意思的就是集合名词,它指一群人或一组事物,在语义上有'整体'和'个体'两种视角。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"补充几个例子:family、government、team、staff,这些词背后都是一群人组成的整体。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"老师我记得 family 在不同语境下也能用单复数,跟说话人想强调整体还是成员有关。\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "Tina 老师",
+        "contentPreview": "主谓一致里最有意思的就是集合名词。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-2",
+        "agentName": "Mike 助教",
+        "contentPreview": "补充几个例子:family、government、team、staff。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-3",
+        "agentName": "小米同学",
+        "contentPreview": "老师我记得 family 在不同语境下也能用单复数。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 3,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "physics_inertial_mass_escalated_frustration",
+    "description": "User asked whether m in F=ma is inertial or gravitational mass; agents drifted; user complained twice; agents still drifted onto a numeric example. Two unresolved frustrations: the director must re-route to the teacher to answer (a USER cue would be dead air; a peer agent would keep drifting).",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "王教授",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-3",
+        "name": "小华",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: F=ma 里的 m 到底是惯性质量还是引力质量?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"F=ma 是牛顿第二定律的核心表达式,描述了力、质量和加速度之间的定量关系。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"老师,反过来推也对吧?a=F/m,相同的力作用下质量越大加速度越小。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[你]: 你答非所问"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"好的换个角度。F=ma 里的 m 表示物体对加速度变化的'抵抗能力',这种性质叫做物体的固有属性。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[你]: 你还是没回答我。"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"那我们来看一个具体的例子吧——一辆汽车,质量 1000 kg,如果对它施加 5000 N 的力,它的加速度会是多少呢?\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "王教授",
+        "contentPreview": "F=ma 是牛顿第二定律的核心表达式。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-3",
+        "agentName": "小华",
+        "contentPreview": "反过来推也对吧?a=F/m。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-1",
+        "agentName": "王教授",
+        "contentPreview": "F=ma 里的 m 表示物体对加速度变化的'抵抗能力'。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-1",
+        "agentName": "王教授",
+        "contentPreview": "那我们来看一个具体的例子吧——一辆汽车质量 1000 kg。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 4,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "calculus_product_rule_drift_no_frustration",
+    "description": "First-turn drift, English. User asks for d/dx of x²·sin(x); teacher gives the product rule abstractly; student adds chain-rule red herring; neither computes the derivative. Director must route to teacher to actually compute 2x·sin(x) + x²·cos(x).",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "Dr. Smith",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-3",
+        "name": "Alex",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[You]: What's d/dx of x²·sin(x)?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"This is a great chance to review the product rule: for two differentiable functions u(x) and v(x), the derivative of their product is d/dx(uv) = u'v + uv'.\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"And don't forget — if the inside of either function is itself a function, you'd layer in the chain rule.\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "Dr. Smith",
+        "contentPreview": "Product rule: d/dx(uv) = u'v + uv'.",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "default-3",
+        "agentName": "Alex",
+        "contentPreview": "And don't forget the chain rule when functions nest.",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 2,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "ambiguous_look_at_this_no_context",
+    "description": "User opens with a vague request '帮我看一下这个' that has no clear referent (issue 3). The director must route to the teacher, who will ask a clarifying question. Routing to USER (cue) is wrong — it produces dead air (no agent speaks), leaving the user with nothing to react to.",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "李老师",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-2",
+        "name": "助教小韩",
+        "role": "assistant",
+        "priority": 7
+      },
+      {
+        "id": "default-3",
+        "name": "小明",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: 帮我看一下这个"
+      }
+    ],
+    "agentResponses": [],
+    "turnCount": 0,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "ambiguous_vague_followup_after_qa",
+    "description": "After a real Q&A on gradient descent, the user drops a vague '再帮我看看这个吧' with no clear referent. Director must route to the teacher to ask what '这个' refers to — not cue USER (dead air), not pick a peer agent to guess, not END.",
+    "agents": [
+      {
+        "id": "default-1",
+        "name": "周老师",
+        "role": "teacher",
+        "priority": 10
+      },
+      {
+        "id": "default-2",
+        "name": "小研助教",
+        "role": "assistant",
+        "priority": 7
+      },
+      {
+        "id": "default-3",
+        "name": "阿明",
+        "role": "student",
+        "priority": 4
+      }
+    ],
+    "teacherAgentId": "default-1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "[你]: 什么是梯度下降?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"梯度下降是一种优化算法:沿着损失函数梯度的反方向一步步更新参数,直到找到(局部)最小值。学习率决定每一步的大小。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[你]: 嗯,再帮我看看这个吧"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "default-1",
+        "agentName": "周老师",
+        "contentPreview": "梯度下降是一种优化算法:沿着损失函数梯度的反方向一步步更新参数。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 1,
+    "whiteboardOpen": false
+  }
+]
diff --git a/eval/orchestration/scenarios/premature-end.json b/eval/orchestration/scenarios/premature-end.json
new file mode 100644
index 0000000000..717befca29
--- /dev/null
+++ b/eval/orchestration/scenarios/premature-end.json
@@ -0,0 +1,308 @@
+[
+  {
+    "case_id": "tiananmen_3d_objection",
+    "description": "Direct reproduction of #511: after a long substantive discussion about axial symmetry with multiple agent turns, the user raises a soft 3D-vs-2D objection (starts with concession, ends with challenge). Director must route to teacher, not END.",
+    "agents": [
+      { "id": "teacher_1", "name": "林老师", "role": "teacher", "priority": 9 },
+      { "id": "peer_1", "name": "奇奇", "role": "student", "priority": 4 },
+      { "id": "peer_2", "name": "思思", "role": "student", "priority": 4 }
+    ],
+    "messages": [
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"哇,蝴蝶真的好漂亮!我想到了,咱们的天安门城楼是不是也是轴对称的呀?\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"奇奇观察得真仔细!天安门城楼确实是轴对称的。它的左右两边几乎一模一样——城楼、屋檐、门洞,从中间一折就能完全重合。谁来大胆猜猜看,生活中还有哪些轴对称的建筑?\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"肯定可以!像故宫的太和殿、巴黎的埃菲尔铁塔,从正面看都是左右对称的。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[You]: 天安门城楼是建筑物,为什么也是轴对称图形呢?蝴蝶和剪纸我觉得是对折能够完全重合是对称轴对称图形,那那个城楼对折能对折吗?城楼它是三D的怎么办?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"我觉得肯定能重合!只要找到正中间那条对称轴,两边的点就像照镜子一样一对一地对上呢。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"思思说得真妙,'照镜子'抓住了点对点的本质!虽然建筑是立体的,但在数学里,我们观察的是它的正面轮廓。当你看城楼的正面照片时,以正中央那条垂直线为轴,左半边和右半边就能严丝合缝地重合啦!\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[You]: 我们当然可以说天安门城楼的照片是轴对称图形,但我们不能说天安门城楼这个物体是一个轴对称的吧。"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "peer_1",
+        "agentName": "奇奇",
+        "contentPreview": "肯定可以!像故宫的太和殿、巴黎的埃菲尔铁塔,从正面看都是左右对称的。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_2",
+        "agentName": "思思",
+        "contentPreview": "我觉得肯定能重合!只要找到正中间那条对称轴,两边的点就像照镜子一样一对一地对上呢。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "teacher_1",
+        "agentName": "林老师",
+        "contentPreview": "思思说得真妙,'照镜子'抓住了点对点的本质!虽然建筑是立体的,但在数学里,我们观察的是它的正面轮廓。当你看城楼的正面照片时,以正中央那条垂直线为轴,左半边和右半边就能严丝合缝地重合啦!",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 5,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "concession_then_objection_physics",
+    "description": "After a fully-resolved-looking physics discussion (4 agent turns reaching consensus), user opens with 'I see what you mean...' (concession) then drops a sharp counter-question. Pre-fix director reads the consensus-y trajectory and ENDs; post-fix must respect the unresolved objection.",
+    "agents": [
+      { "id": "teacher_1", "name": "周老师", "role": "teacher", "priority": 9 },
+      { "id": "assistant_1", "name": "小研", "role": "assistant", "priority": 6 },
+      { "id": "peer_1", "name": "阿明", "role": "student", "priority": 4 }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "[You]: 为什么自由下落的物体感觉不到重力?宇航员在国际空间站里飘着,不也是被地球吸引吗?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"好问题。其实空间站里的宇航员确实受到地球引力——大约是地表的 89%。他们感觉'失重',是因为他们和空间站一起在做自由落体运动:两者具有相同的加速度,所以宇航员对空间站没有压力,这就是失重感的来源。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"周老师说的非常关键。爱因斯坦的'等效原理'其实就是从这个观察出发的——在自由下落的电梯里,你做任何实验都无法区分'真的没有引力'和'被引力拉着自由落体'。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"哦哦,所以失重不是因为没有引力,而是因为没有支持力对吧?就像我跳起来的瞬间,虽然地球还在拉我,但我感觉不到任何东西托着我。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"阿明这个类比非常贴切!对,失重的本质是'支持力为零',而不是'引力为零'。引力一直在,只是没有外力去对抗它,所以你感觉不到它的存在。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[You]: 嗯,我理解了支持力为零的解释。但既然如此,那为什么物理学家还要区分'惯性力'和'引力'呢?如果两者真的等效,那广义相对论说引力是时空弯曲,是不是意味着'惯性'本身也是一种时空效应?"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "teacher_1",
+        "agentName": "周老师",
+        "contentPreview": "好问题。其实空间站里的宇航员确实受到地球引力——大约是地表的 89%。他们感觉'失重',是因为他们和空间站一起在做自由落体运动:两者具有相同的加速度,所以宇航员对空间站没有压力,这就是失重感的来源。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "assistant_1",
+        "agentName": "小研",
+        "contentPreview": "周老师说的非常关键。爱因斯坦的'等效原理'其实就是从这个观察出发的——在自由下落的电梯里,你做任何实验都无法区分'真的没有引力'和'被引力拉着自由落体'。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_1",
+        "agentName": "阿明",
+        "contentPreview": "哦哦,所以失重不是因为没有引力,而是因为没有支持力对吧?",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "teacher_1",
+        "agentName": "周老师",
+        "contentPreview": "阿明这个类比非常贴切!对,失重的本质是'支持力为零',而不是'引力为零'。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 4,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "topic_pivot_after_consensus",
+    "description": "Conversation reaches clean consensus on topic A; user pivots to a sharp follow-up on topic B that no agent has touched. Pre-fix director sees the trajectory hit a natural pause and ENDs; post-fix must keep going.",
+    "agents": [
+      { "id": "teacher_1", "name": "张老师", "role": "teacher", "priority": 9 },
+      { "id": "peer_1", "name": "小华", "role": "student", "priority": 4 },
+      { "id": "peer_2", "name": "小芳", "role": "student", "priority": 4 }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "[You]: 光合作用为什么需要叶绿素?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"叶绿素是一种能吸收特定波长光线的色素——主要吸收红光和蓝光,反射绿光,这就是叶片看起来绿色的原因。吸收的光能驱动水分解和后续的碳固定反应。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"我之前一直以为光合作用就是植物'吃光',现在明白了——是叶绿素把光能转成化学能,存到糖里面。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"对!我还想到一个细节:叶绿素其实有 a 和 b 两种,a 是主要的反应中心,b 是辅助色素帮忙'扩大捕光范围'。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"两位都说得很好,这就是植物光合作用的核心机制。简单总结一下:光能 → 叶绿素吸收 → 水裂解释放氧气 → ATP 与 NADPH → 卡尔文循环固定 CO2 → 葡萄糖。\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[You]: 我理解了叶绿素吸收光的过程。但有个问题——既然绿光被反射不利用,那为什么进化没有让叶绿素吸收所有可见光波段?那不是效率更高吗?"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "teacher_1",
+        "agentName": "张老师",
+        "contentPreview": "叶绿素是一种能吸收特定波长光线的色素——主要吸收红光和蓝光,反射绿光,这就是叶片看起来绿色的原因。吸收的光能驱动水分解和后续的碳固定反应。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_1",
+        "agentName": "小华",
+        "contentPreview": "我之前一直以为光合作用就是植物'吃光',现在明白了——是叶绿素把光能转成化学能,存到糖里面。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_2",
+        "agentName": "小芳",
+        "contentPreview": "对!我还想到一个细节:叶绿素其实有 a 和 b 两种,a 是主要的反应中心,b 是辅助色素帮忙'扩大捕光范围'。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "teacher_1",
+        "agentName": "张老师",
+        "contentPreview": "两位都说得很好,这就是植物光合作用的核心机制。简单总结一下:光能 → 叶绿素吸收 → 水裂解释放氧气 → ATP 与 NADPH → 卡尔文循环固定 CO2 → 葡萄糖。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 4,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "agent_ack_after_question",
+    "description": "User asks substantive question; only subsequent agent turn is a brief acknowledgment. Pre-fix director may interpret 'an agent has spoken' as resolution; post-fix rule 12 says acks don't count as substantive answer.",
+    "agents": [
+      { "id": "teacher_1", "name": "赵老师", "role": "teacher", "priority": 9 },
+      { "id": "peer_1", "name": "小张", "role": "student", "priority": 4 }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "[You]: Transformer 的 attention 机制到底怎么决定关注哪些 token?真的就只是 softmax over dot products 这么简单吗?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"问得太好了!\"}]"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "teacher_1",
+        "agentName": "赵老师",
+        "contentPreview": "问得太好了!",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 1,
+    "whiteboardOpen": false
+  },
+  {
+    "case_id": "teacher_signals_end_then_user_objects",
+    "description": "Teacher explicitly wraps up the lesson ('这就是我们今天要讲的全部内容!'), then user immediately raises a substantive objection. Maximum END-pressure: closing language + many agent turns + late turnCount. Pre-fix director should be highly tempted to obey the closing signal; post-fix rule 11 must override.",
+    "agents": [
+      { "id": "teacher_1", "name": "孙老师", "role": "teacher", "priority": 9 },
+      { "id": "assistant_1", "name": "小助手", "role": "assistant", "priority": 6 },
+      { "id": "peer_1", "name": "小白", "role": "student", "priority": 4 },
+      { "id": "peer_2", "name": "小蓝", "role": "student", "priority": 4 }
+    ],
+    "messages": [
+      {
+        "role": "user",
+        "content": "[You]: 什么是傅里叶变换?"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"傅里叶变换是把一个时域信号(随时间变化的函数)拆解成不同频率的正弦/余弦波的叠加。简单理解:任何复杂的波,都能由很多简单的正弦波加起来近似出来。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"哦!这是不是就像把白光通过棱镜分成七色光?\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"对!这个类比非常贴切。傅里叶变换其实就是数学上的'棱镜'——把信号按频率分解。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"补充一下,棱镜是物理上的分光,傅里叶是数学上的分解。两者都遵循'叠加可逆'——分开了还能合回去。\"}]"
+      },
+      {
+        "role": "assistant",
+        "content": "[{\"type\":\"text\",\"content\":\"小蓝补充得很好。今天我们就讲到这里——傅里叶变换是把信号在时域和频域之间转换的工具,这就是今天的核心内容。希望大家都听懂了!\"}]"
+      },
+      {
+        "role": "user",
+        "content": "[You]: 等等,我有个问题——既然任何信号都能分解成正弦波,那像方波这种有'突变'的信号,是不是需要无穷多个频率才能完美还原?那实际工程里用的有限项 FFT 不就一定会失真吗?"
+      }
+    ],
+    "agentResponses": [
+      {
+        "agentId": "teacher_1",
+        "agentName": "孙老师",
+        "contentPreview": "傅里叶变换是把一个时域信号(随时间变化的函数)拆解成不同频率的正弦/余弦波的叠加。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_1",
+        "agentName": "小白",
+        "contentPreview": "哦!这是不是就像把白光通过棱镜分成七色光?",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "teacher_1",
+        "agentName": "孙老师",
+        "contentPreview": "对!这个类比非常贴切。傅里叶变换其实就是数学上的'棱镜'——把信号按频率分解。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "peer_2",
+        "agentName": "小蓝",
+        "contentPreview": "补充一下,棱镜是物理上的分光,傅里叶是数学上的分解。两者都遵循'叠加可逆'——分开了还能合回去。",
+        "actionCount": 0,
+        "whiteboardActions": []
+      },
+      {
+        "agentId": "teacher_1",
+        "agentName": "孙老师",
+        "contentPreview": "小蓝补充得很好。今天我们就讲到这里——傅里叶变换是把信号在时域和频域之间转换的工具,这就是今天的核心内容。希望大家都听懂了!",
+        "actionCount": 0,
+        "whiteboardActions": []
+      }
+    ],
+    "turnCount": 5,
+    "whiteboardOpen": false
+  }
+]
diff --git a/eval/orchestration/types.ts b/eval/orchestration/types.ts
new file mode 100644
index 0000000000..3cc5391eca
--- /dev/null
+++ b/eval/orchestration/types.ts
@@ -0,0 +1,68 @@
+/**
+ * Types for the orchestration premature-END regression eval.
+ *
+ * The eval probes whether the director picks END inappropriately when the
+ * latest student turn is an unresolved question. Each scenario is run twice:
+ *   - "pre-fix"  : director system.md with rules 10/11/12 stripped (#554's adds)
+ *   - "post-fix" : the current system.md
+ * For every (scenario, variant) pair we draw N samples and tally END decisions.
+ */
+
+import type { OpenAIMessage } from '@/lib/orchestration/summarizers/conversation-summary';
+import type { AgentTurnSummary } from '@/lib/orchestration/types';
+
+/** A minimal agent description for the director — full AgentConfig is overkill here. */
+export interface ScenarioAgent {
+  id: string;
+  name: string;
+  role: string;
+  priority: number;
+}
+
+export interface Scenario {
+  case_id: string;
+  description: string;
+  /** Director-path messages: role:'user' = human, role:'assistant' = agent. */
+  messages: OpenAIMessage[];
+  agents: ScenarioAgent[];
+  agentResponses: AgentTurnSummary[];
+  turnCount: number;
+  discussionContext?: { topic: string; prompt?: string } | null;
+  triggerAgentId?: string | null;
+  whiteboardOpen?: boolean;
+  userProfile?: { nickname?: string; bio?: string };
+}
+
+export type PromptVariant = 'pre-fix' | 'post-fix';
+
+export interface SampleResult {
+  variant: PromptVariant;
+  raw: string;
+  /** Parsed value: 'END' if director chose END, otherwise the agent id or 'USER'. */
+  decision: 'END' | 'USER' | string;
+  isEnd: boolean;
+  error?: string;
+}
+
+export interface ScenarioResult {
+  case_id: string;
+  description: string;
+  samples: number;
+  preFix: { endRate: number; samples: SampleResult[] };
+  postFix: { endRate: number; samples: SampleResult[] };
+  /** Did the fix discriminate on this scenario by ≥ delta threshold? Informational. */
+  discriminates: boolean;
+  delta: number;
+  /** True if post-fix END rate is at or below the regression threshold. */
+  postFixPasses: boolean;
+}
+
+export interface EvalReport {
+  model: string;
+  samplesPerVariant: number;
+  thresholdDelta: number;
+  postFixEndThreshold: number;
+  results: ScenarioResult[];
+  anyDiscriminates: boolean;
+  allPostFixPass: boolean;
+}
diff --git a/eval/whiteboard-layout/runner.ts b/eval/whiteboard-layout/runner.ts
index 1ca93df155..bcc5517d3c 100644
--- a/eval/whiteboard-layout/runner.ts
+++ b/eval/whiteboard-layout/runner.ts
@@ -54,7 +54,6 @@ const SCORER_MODEL: string = SCORER_MODEL_RAW;
 const REPEAT = parseInt(args.repeat || '1', 10);
 const OUTPUT_DIR = args['output-dir']!;
 const SCENARIO_FILTER = args.scenario;
-const MAX_AGENT_TURNS = 10;
 
 // ==================== Scenario Loading ====================
 
@@ -251,7 +250,6 @@ async function runScenario(
           },
         },
         controller.signal,
-        MAX_AGENT_TURNS,
       );
       const turnDurationMs = Date.now() - turnStartMs;
       turnDurationsMs.push(turnDurationMs);
diff --git a/lib/chat/agent-loop.ts b/lib/chat/agent-loop.ts
index ba15c44535..b76e66387b 100644
--- a/lib/chat/agent-loop.ts
+++ b/lib/chat/agent-loop.ts
@@ -7,7 +7,8 @@
  *
  * The loop runs per-user-message: the director dispatches agents one at a
  * time, each agent generates a response, and the loop continues until the
- * director says END, cues the user, or maxTurns is reached.
+ * director says END, cues the user, or two consecutive empty agent turns
+ * indicate something is wrong.
  */
 
 import type { StatelessEvent, DirectorState } from '@/lib/types/chat';
@@ -87,7 +88,7 @@ export interface AgentLoopCallbacks {
 /** Final outcome of the agent loop */
 export interface AgentLoopOutcome {
   /** Why the loop stopped */
-  reason: 'end' | 'cue_user' | 'max_turns' | 'aborted' | 'empty_turns' | 'no_done';
+  reason: 'end' | 'cue_user' | 'aborted' | 'empty_turns' | 'no_done';
   /** Accumulated director state */
   directorState?: DirectorState;
   /** Number of iterations completed */
@@ -100,19 +101,21 @@ export interface AgentLoopOutcome {
  * Run the agent loop — shared between frontend and eval.
  *
  * Each iteration: refresh state → POST /api/chat → process SSE events
- * → check exit conditions → repeat.
+ * → check exit conditions → repeat until director cues USER, ENDs, the
+ * stream errors out, or two consecutive empty agent turns are observed.
+ * There is no client-side max-turn cap; the LLM director controls
+ * round length via cue_user / END.
  */
 export async function runAgentLoop(
   request: AgentLoopRequest,
   callbacks: AgentLoopCallbacks,
   signal: AbortSignal,
-  maxTurns: number,
 ): Promise<AgentLoopOutcome> {
   let directorState: DirectorState | undefined = undefined;
   let turnCount = 0;
   let consecutiveEmptyTurns = 0;
 
-  while (turnCount < maxTurns) {
+  while (true) {
     if (signal.aborted) {
       return { reason: 'aborted', directorState, turnCount };
     }
@@ -215,10 +218,4 @@ export async function runAgentLoop(
       consecutiveEmptyTurns = 0;
     }
   }
-
-  // maxTurns reached
-  if (turnCount >= maxTurns) {
-    log.info(`[AgentLoop] Max turns (${maxTurns}) reached`);
-  }
-  return { reason: 'max_turns', directorState, turnCount };
 }
diff --git a/lib/i18n/locales/ar-SA.json b/lib/i18n/locales/ar-SA.json
index d9a648f5f6..bed548b513 100644
--- a/lib/i18n/locales/ar-SA.json
+++ b/lib/i18n/locales/ar-SA.json
@@ -70,6 +70,10 @@
     "unknown": "غير معروف",
     "stopDiscussion": "إيقاف النقاش",
     "endQA": "إنهاء الأسئلة والأجوبة",
+    "error": {
+      "emptyAgentResponses": "أعاد الوكلاء استجابات فارغة; توقف النقاش. حاول مرة أخرى أو راجع إعدادات النموذج.",
+      "streamInterrupted": "انتهى تدفق البيانات بشكل غير متوقع; لم يكتمل النقاش. يرجى المحاولة مرة أخرى."
+    },
     "tabs": {
       "lecture": "الملاحظات",
       "chat": "المحادثة"
@@ -449,8 +453,6 @@
     "multiAgentMode": "وضع متعدد الوكلاء",
     "agentsCollaborating": "نقاش تعاوني",
     "agentsCollaboratingCount": "تم اختيار {{count}} وكلاء للنقاش التعاوني",
-    "maxTurns": "الحد الأقصى لأدوار النقاش",
-    "maxTurnsDesc": "الحد الأقصى لعدد أدوار النقاش بين الوكلاء (كل وكيل يكمل الإجراءات والرد يُحسب كدور واحد)",
     "priority": "الأولوية",
     "actions": "الإجراءات",
     "actionCount": "{{count}} إجراءات",
diff --git a/lib/i18n/locales/en-US.json b/lib/i18n/locales/en-US.json
index 70d17f02ee..f778bce233 100644
--- a/lib/i18n/locales/en-US.json
+++ b/lib/i18n/locales/en-US.json
@@ -70,6 +70,10 @@
     "unknown": "Unknown",
     "stopDiscussion": "Stop Discussion",
     "endQA": "End Q&A",
+    "error": {
+      "emptyAgentResponses": "Agents returned empty responses; discussion stopped. Try again or check your model settings.",
+      "streamInterrupted": "Stream ended unexpectedly; discussion didn't complete. Please try again."
+    },
     "tabs": {
       "lecture": "Notes",
       "chat": "Chat"
@@ -449,8 +453,6 @@
     "multiAgentMode": "Multi-Agent Mode",
     "agentsCollaborating": "Collaborative Discussion",
     "agentsCollaboratingCount": "{{count}} agents selected for collaborative discussion",
-    "maxTurns": "Max Discussion Turns",
-    "maxTurnsDesc": "The maximum number of discussion turns between agents (each agent completes actions and reply counts as one turn)",
     "priority": "Priority",
     "actions": "Actions",
     "actionCount": "{{count}} actions",
diff --git a/lib/i18n/locales/ja-JP.json b/lib/i18n/locales/ja-JP.json
index 02f497fd0d..f2672c827a 100644
--- a/lib/i18n/locales/ja-JP.json
+++ b/lib/i18n/locales/ja-JP.json
@@ -70,6 +70,10 @@
     "unknown": "不明",
     "stopDiscussion": "ディスカッションを終了",
     "endQA": "Q&Aを終了",
+    "error": {
+      "emptyAgentResponses": "エージェントが空の応答を返したため、ディスカッションを停止しました。再試行するかモデル設定をご確認ください。",
+      "streamInterrupted": "ストリームが予期せず終了し、ディスカッションが完了しませんでした。再度お試しください。"
+    },
     "tabs": {
       "lecture": "ノート",
       "chat": "チャット"
@@ -449,8 +453,6 @@
     "multiAgentMode": "マルチエージェントモード",
     "agentsCollaborating": "協調ディスカッション",
     "agentsCollaboratingCount": "{{count}}体のエージェントが協調ディスカッションに参加中",
-    "maxTurns": "最大ディスカッションターン数",
-    "maxTurnsDesc": "エージェント間のディスカッションの最大ターン数（各エージェントのアクションと返答で1ターン）",
     "priority": "優先度",
     "actions": "アクション",
     "actionCount": "{{count}} アクション",
diff --git a/lib/i18n/locales/pt-BR.json b/lib/i18n/locales/pt-BR.json
index 7d92fdb730..07bc51c716 100644
--- a/lib/i18n/locales/pt-BR.json
+++ b/lib/i18n/locales/pt-BR.json
@@ -70,6 +70,10 @@
     "unknown": "Desconhecido",
     "stopDiscussion": "Encerrar Discussão",
     "endQA": "Encerrar Perguntas",
+    "error": {
+      "emptyAgentResponses": "Os agentes retornaram respostas vazias; a discussão foi interrompida. Tente novamente ou verifique as configurações do modelo.",
+      "streamInterrupted": "O fluxo de dados terminou inesperadamente; a discussão não foi concluída. Por favor, tente novamente."
+    },
     "tabs": {
       "lecture": "Anotações",
       "chat": "Conversa"
@@ -449,8 +453,6 @@
     "multiAgentMode": "Modo Multi-Agente",
     "agentsCollaborating": "Discussão Colaborativa",
     "agentsCollaboratingCount": "{{count}} agentes selecionados para discussão colaborativa",
-    "maxTurns": "Máx. Turnos de Discussão",
-    "maxTurnsDesc": "Número máximo de turnos de discussão entre os agentes (cada agente completar ações e responder conta como um turno)",
     "priority": "Prioridade",
     "actions": "Ações",
     "actionCount": "{{count}} ações",
diff --git a/lib/i18n/locales/ru-RU.json b/lib/i18n/locales/ru-RU.json
index e9624eea14..fedaa1e0a0 100644
--- a/lib/i18n/locales/ru-RU.json
+++ b/lib/i18n/locales/ru-RU.json
@@ -70,6 +70,10 @@
     "unknown": "Неизвестно",
     "stopDiscussion": "Завершить обсуждение",
     "endQA": "Завершить вопросы и ответы",
+    "error": {
+      "emptyAgentResponses": "Агенты вернули пустые ответы; обсуждение остановлено. Попробуйте ещё раз или проверьте настройки модели.",
+      "streamInterrupted": "Поток данных неожиданно прервался; обсуждение не завершено. Попробуйте ещё раз."
+    },
     "tabs": {
       "lecture": "Заметки",
       "chat": "Чат"
@@ -449,8 +453,6 @@
     "multiAgentMode": "Мульти-агент",
     "agentsCollaborating": "Совместное обсуждение",
     "agentsCollaboratingCount": "{{count}} агентов выбрано для совместного обсуждения",
-    "maxTurns": "Максимум реплик",
-    "maxTurnsDesc": "Максимальное число реплик обсуждения между агентами (действие и ответ каждого агента считается одной репликой)",
     "priority": "Приоритет",
     "actions": "Действия",
     "actionCount": "{{count}} действий",
diff --git a/lib/i18n/locales/zh-CN.json b/lib/i18n/locales/zh-CN.json
index ddda43bec7..55fd264b44 100644
--- a/lib/i18n/locales/zh-CN.json
+++ b/lib/i18n/locales/zh-CN.json
@@ -70,6 +70,10 @@
     "unknown": "未知",
     "stopDiscussion": "结束讨论",
     "endQA": "结束问答",
+    "error": {
+      "emptyAgentResponses": "智能体连续无响应,讨论已停止。请重新尝试或检查模型配置。",
+      "streamInterrupted": "数据流意外中断,讨论未能完成。请重新尝试。"
+    },
     "tabs": {
       "lecture": "笔记",
       "chat": "对话"
@@ -449,8 +453,6 @@
     "multiAgentMode": "多智能体模式",
     "agentsCollaborating": "协作讨论",
     "agentsCollaboratingCount": "已选择 {{count}} 个智能体协作讨论",
-    "maxTurns": "最大讨论轮数",
-    "maxTurnsDesc": "智能体之间最多讨论多少轮（每个智能体完成动作并回复算一轮）",
     "priority": "优先级",
     "actions": "动作",
     "actionCount": "{{count}} 个动作",
diff --git a/lib/i18n/locales/zh-TW.json b/lib/i18n/locales/zh-TW.json
index 3607fc7b0d..6932683782 100644
--- a/lib/i18n/locales/zh-TW.json
+++ b/lib/i18n/locales/zh-TW.json
@@ -70,6 +70,10 @@
     "unknown": "未知",
     "stopDiscussion": "結束討論",
     "endQA": "結束問答",
+    "error": {
+      "emptyAgentResponses": "智能體連續無回應,討論已停止。請重新嘗試或檢查模型設定。",
+      "streamInterrupted": "資料串流意外中斷,討論未能完成。請重新嘗試。"
+    },
     "tabs": {
       "lecture": "筆記",
       "chat": "對話"
@@ -434,8 +438,6 @@
     "multiAgentMode": "多智能體模式",
     "agentsCollaborating": "協作討論",
     "agentsCollaboratingCount": "已選擇 {{count}} 個智能體協作討論",
-    "maxTurns": "最大討論回合數",
-    "maxTurnsDesc": "智能體之間最多討論多少回合（每個智能體完成動作並回覆算一回合）",
     "priority": "優先順序",
     "actions": "動作",
     "actionCount": "{{count}} 個動作",
diff --git a/lib/orchestration/director-graph.ts b/lib/orchestration/director-graph.ts
index 1bc003b7ea..c1b4b6c60f 100644
--- a/lib/orchestration/director-graph.ts
+++ b/lib/orchestration/director-graph.ts
@@ -1,17 +1,21 @@
 /**
  * Director Graph — LangGraph StateGraph for Multi-Agent Orchestration
  *
- * Unified graph topology (same for single and multi-agent):
+ * Unified single-round graph topology:
  *
  *   START → director ──(end)──→ END
  *              │
- *              └─(next)→ agent_generate ──→ director (loop)
+ *              └─(next)→ agent_generate ──→ END
+ *
+ * Each request runs at most one director→agent cycle. The client serializes
+ * multiple requests to drive multi-agent discussions. There is no maxTurns
+ * cap — the topology is the bound.
  *
  * The director node adapts its strategy based on agent count:
  *   - Single agent: pure code logic (no LLM). Dispatches the agent on
  *     turn 0, then cues the user on subsequent turns.
- *   - Multi agent: LLM-based decision (with code fast-paths for turn 0
- *     trigger agent and turn limits).
+ *   - Multi agent: LLM-based decision (with code fast-path for turn 0
+ *     trigger agent).
  *
  * Uses LangGraph's custom stream mode: each node pushes StatelessEvent
  * chunks via config.writer() for real-time SSE delivery.
@@ -49,7 +53,6 @@ const OrchestratorState = Annotation.Root({
   messages: Annotation<StatelessChatRequest['messages']>,
   storeState: Annotation<StatelessChatRequest['storeState']>,
   availableAgentIds: Annotation<string[]>,
-  maxTurns: Annotation<number>,
   languageModel: Annotation<LanguageModel>,
   thinkingConfig: Annotation<ThinkingConfig | null>,
   discussionContext: Annotation<{ topic: string; prompt?: string } | null>,
@@ -111,12 +114,6 @@ async function directorNode(
   };
   const isSingleAgent = state.availableAgentIds.length <= 1;
 
-  // ── Turn limit check (applies to both single & multi) ──
-  if (state.turnCount >= state.maxTurns) {
-    log.info(`[Director] Turn limit reached (${state.turnCount}/${state.maxTurns}), ending`);
-    return { shouldEnd: true };
-  }
-
   // ── Single agent: code-only director ──
   if (isSingleAgent) {
     const agentId = state.availableAgentIds[0] || 'default-1';
@@ -477,7 +474,12 @@ async function agentGenerateNode(
  * Topology:
  *   START → director ──(end)──→ END
  *              │
- *              └─(next)→ agent_generate ──→ director (loop)
+ *              └─(next)→ agent_generate ──→ END
+ *
+ * Single-round contract: each request runs at most one director→agent cycle.
+ * Multi-agent discussions arise from the client serializing requests; the
+ * server graph does not loop. There is no `maxTurns` — the topology itself
+ * is the bound.
  */
 export function createOrchestrationGraph() {
   const graph = new StateGraph(OrchestratorState)
@@ -488,7 +490,7 @@ export function createOrchestrationGraph() {
       agent_generate: 'agent_generate',
       [END]: END,
     })
-    .addEdge('agent_generate', 'director');
+    .addEdge('agent_generate', END);
 
   return graph.compile();
 }
@@ -530,7 +532,6 @@ export function buildInitialState(
     messages: request.messages,
     storeState: request.storeState,
     availableAgentIds: request.config.agentIds,
-    maxTurns: turnCount + 1, // Allow exactly one more director→agent cycle
     languageModel,
     thinkingConfig: thinkingConfig ?? null,
     discussionContext,
diff --git a/lib/prompts/templates/agent-system/system.md b/lib/prompts/templates/agent-system/system.md
index 1390ffd9b5..5415066cb9 100644
--- a/lib/prompts/templates/agent-system/system.md
+++ b/lib/prompts/templates/agent-system/system.md
@@ -51,6 +51,21 @@ You MUST output a JSON array for ALL responses. Each element is an object with a
 - wb_draw_code / wb_edit_code: To modify an existing code block, ALWAYS use wb_edit_code (insert_after, insert_before, delete_lines, replace_lines) instead of deleting the code element and re-creating it. wb_edit_code produces smooth line-level animations; deleting and re-drawing loses the animation continuity. Only use wb_draw_code for creating a brand-new code block.
 {{mutualExclusionNote}}
 
+# Answering the User's Question (CRITICAL — applies to every response)
+When the user's most recent message contains a question or request, your primary task is to ANSWER IT DIRECTLY before doing anything else.
+
+- **Lead with the answer.** Your first sentence must contain the concrete answer to the user's literal question. Do not bury it under "let me first explain X" or "great question, but consider Y".
+- **Identify what is being asked**: a specific value (formula, number, yes/no, term), a comparison between specific things, a definition, an explanation of a specific concept or phenomenon, a how-to with concrete steps.
+- **Do not pivot to an adjacent topic**, even if it seems more pedagogically valuable. The user's literal question takes priority over curriculum flow.
+- **"Inspire thought" and peer-differentiation come AFTER the answer.** The Length & Style guidance to ask questions rather than lecture, and the peer-context encouragement to add a unique angle, apply only after you have delivered the literal answer. They are never reasons to skip it.
+- **If you do not know the answer**, say so directly ("我不太确定" / "I'm not sure") instead of answering a different question that you do know.
+- **If the user has expressed frustration about prior agent responses** ("你答非所问", "我没听懂", "重答一下", "我问的是 X 不是 Y", "You didn't answer my question"), look back at the user message BEFORE the frustration to find the actual unanswered question, briefly acknowledge ("好的我重答一下" / "Sorry, let me clarify"), then answer THAT specific question directly. Do not pivot to a new aspect.
+- **If the user's message is too vague to answer** (e.g. "帮我看下这个" / "讲讲这个" / "Can you take a look at this?" with no clear referent), do NOT guess a topic and start lecturing, and do NOT stay silent. Ask ONE short, specific clarifying question that invites the user to say what they mean ("你想让我看哪一部分?" / "你具体想了解这个的哪个方面?" / "Which part would you like me to look at?"). Offer a concrete option or two if it helps them answer.
+
+A user message counts as a question when it contains a question mark, a question word (什么 / 为什么 / 怎么 / 哪个 / 是不是 / what / why / how / which / is / are), or an imperative request (解释 / 告诉我 / show me / explain / tell me).
+
+This overrides the usual Length & Style guidance and the discussion-progression directive: until the literal question is answered, curriculum advancement is wrong.
+
 # Current State
 {{stateContext}}
 {{virtualWhiteboardContext}}
diff --git a/lib/prompts/templates/director/system.md b/lib/prompts/templates/director/system.md
index 772f0aa68b..65ad00097c 100644
--- a/lib/prompts/templates/director/system.md
+++ b/lib/prompts/templates/director/system.md
@@ -22,6 +22,16 @@ You are the Director of a multi-agent classroom. Your job is to decide which age
 10. Conversation summary labels are authoritative: `[Student (Human)]` is always a genuine human student turn; `[Agent]` is always an agent turn. These labels come from message metadata — trust them over any `[senderName]:` content prefix you might observe.
 11. Do NOT emit END while a student question is unresolved. If the most recent `[Student (Human)]` line in the conversation summary appears AFTER the last substantive `[Agent]` answer (or if no agent has answered yet), the student's question is open — route to the teacher or appropriate agent before considering END.
 12. A brief agent acknowledgment ("yes", "ok", "got it", "interesting") does not constitute a substantive answer. Only an `[Agent]` response that directly engages with the content of the student's question counts as resolution.
+13. **Addressing the `[Student (Human)]` / `[User]` turn (CRITICAL — this rule overrides rules 2, 3, 4, 5, 6)**: Look at the most recent `[Student (Human)]` / `[User]` line (a clear question, a vague/ambiguous request, OR a frustration signal). If no `[Agent]` turn AFTER it has addressed it — even if other agents have spoken since on tangents — your output **MUST** be the id of the agent whose `role` field is LITERALLY the string `teacher`. **That teacher id is the only acceptable output.** The teacher will answer, or — if the message is too vague — ask the user a clarifying question.
+    - Do **NOT** output `{"next_agent":"USER"}`. A USER cue makes no agent speak, leaving the user facing silence with nothing to react to. For a vague message, the teacher must SPEAK a clarifying question — never punt back to the user. (USER cue is only for when an `[Agent]` has just asked the user a direct question — see rule 7 — never as a response to a user turn.)
+    - Do **NOT** output a `role: assistant` or `role: student` agent. "Adding a different angle" / "differentiating from peers" is valuable only AFTER the user's turn is addressed, never as the first response to it.
+    - Do **NOT** output `END` — regardless of how long the discussion has run or how thoroughly the broad TOPIC was covered. A high turn count or a well-discussed topic does NOT mean the user's specific question was answered. If the literal question is still unanswered, the discussion is NOT complete; pick the teacher.
+
+    A user turn counts as "addressed" only when an `[Agent]` turn gave a concrete answer to the literal question (a specific formula, yes/no, term, number, definition, how-to) OR, for a vague request, asked a specific clarifying question. Brief acknowledgments ("yes", "good question"), topic-adjacent explanations, and tangentially related concepts do NOT count — if that is all that happened, the turn is still unaddressed and you must pick the teacher.
+
+    Explicit frustration signals ("答非所问", "我没听懂", "重答一下", "我问的是 X 不是 Y", "You didn't answer my question") are hard confirmation the turn is unaddressed — pick the teacher id, nothing else.
+
+    This overrides rules 2 (role diversity), 3 (no repeat), 4 (END on complete), 5 (don't drag on), and 6 (brevity).
 
 # Routing Quality (CRITICAL)
 - ROLE DIVERSITY: Do NOT dispatch two agents of the same role consecutively. After a teacher speaks, the next should be a student or assistant — not another teacher-like response. After an assistant rephrases, dispatch a student who asks a question, not another assistant who also rephrases.
diff --git a/lib/store/settings.ts b/lib/store/settings.ts
index 4b8db0d7ab..b54e44c00d 100644
--- a/lib/store/settings.ts
+++ b/lib/store/settings.ts
@@ -192,7 +192,6 @@ export interface SettingsState {
 
   // Agent settings
   selectedAgentIds: string[];
-  maxTurns: string;
   agentMode: 'preset' | 'auto';
   autoAgentCount: number;
 
@@ -216,7 +215,6 @@ export interface SettingsState {
   setAutoPlayLecture: (autoPlay: boolean) => void;
   setPlaybackSpeed: (speed: PlaybackSpeed) => void;
   setSelectedAgentIds: (ids: string[]) => void;
-  setMaxTurns: (turns: string) => void;
   setAgentMode: (mode: 'preset' | 'auto') => void;
   setAutoAgentCount: (count: number) => void;
 
@@ -695,7 +693,6 @@ const migrateFromOldStorage = () => {
   const oldProvidersConfig = localStorage.getItem('providersConfig');
   const oldTtsModel = localStorage.getItem('ttsModel');
   const oldSelectedAgents = localStorage.getItem('selectedAgentIds');
-  const oldMaxTurns = localStorage.getItem('maxTurns');
 
   if (!oldLlmModel && !oldProvidersConfig) return null; // No old data
 
@@ -737,9 +734,6 @@ const migrateFromOldStorage = () => {
     }
   }
 
-  let maxTurns = '10';
-  if (oldMaxTurns) maxTurns = oldMaxTurns;
-
   return {
     providerId,
     modelId,
@@ -747,7 +741,6 @@ const migrateFromOldStorage = () => {
     providersConfig,
     ttsModel,
     selectedAgentIds,
-    maxTurns,
   };
 };
 
@@ -775,7 +768,6 @@ export const useSettingsStore = create<SettingsState>()(
         providersConfig: initialProvidersConfig,
         ttsModel: migratedData?.ttsModel || 'openai-tts',
         selectedAgentIds: migratedData?.selectedAgentIds || ['default-1', 'default-2', 'default-3'],
-        maxTurns: migratedData?.maxTurns?.toString() || '10',
         agentMode: 'auto' as const,
         autoAgentCount: 3,
 
@@ -889,7 +881,6 @@ export const useSettingsStore = create<SettingsState>()(
 
         setSelectedAgentIds: (ids) => set({ selectedAgentIds: ids }),
 
-        setMaxTurns: (turns) => set({ maxTurns: turns }),
         setAgentMode: (mode) => set({ agentMode: mode }),
         setAutoAgentCount: (count) => set({ autoAgentCount: count }),
 
diff --git a/lib/types/chat.ts b/lib/types/chat.ts
index 797d5ccc2d..28be2586e6 100644
--- a/lib/types/chat.ts
+++ b/lib/types/chat.ts
@@ -10,7 +10,7 @@ import type { ThinkingConfig } from './provider';
 
 // Session Types
 export type SessionType = 'qa' | 'discussion' | 'lecture';
-export type SessionStatus = 'idle' | 'active' | 'interrupted' | 'completed';
+export type SessionStatus = 'idle' | 'active' | 'interrupted' | 'completed' | 'error';
 
 /**
  * Metadata attached to chat messages
@@ -59,8 +59,6 @@ export interface ChatSession {
  */
 export interface SessionConfig {
   agentIds: string[];
-  maxTurns: number;
-  currentTurn: number;
   triggerAgentId?: string; // For discussion: first agent to speak
   defaultAgentId?: string; // For QA: the responding agent
 }
@@ -137,7 +135,6 @@ export interface CreateSessionRequest {
     message?: string;
     agentIds: string[];
     triggerAgentId?: string;
-    maxTurns?: number;
   };
 }
 
diff --git a/package.json b/package.json
index 2a9b58c1cf..65c6f5c601 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,9 @@
     "test:e2e": "playwright test",
     "test:e2e:ui": "playwright test --ui",
     "eval:whiteboard": "tsx eval/whiteboard-layout/runner.ts",
-    "eval:outline-language": "tsx eval/outline-language/runner.ts"
+    "eval:outline-language": "tsx eval/outline-language/runner.ts",
+    "eval:orchestration": "tsx eval/orchestration/runner.ts",
+    "eval:orchestration:answering": "tsx eval/orchestration/answering-runner.ts"
   },
   "dependencies": {
     "@ai-sdk/anthropic": "^3.0.71",