diff --git a/app/api/scout/discover/route.ts b/app/api/scout/discover/route.ts
new file mode 100644
index 0000000..6767dfe
--- /dev/null
+++ b/app/api/scout/discover/route.ts
@@ -0,0 +1,69 @@
+import { NextRequest, NextResponse } from "next/server"
+import { generateObject } from "ai"
+import { google } from "@ai-sdk/google"
+import { z } from "zod"
+
+const DiscoverSchema = z.object({
+  companies: z.array(
+    z.object({
+      name: z.string().describe("Company name"),
+      url: z
+        .string()
+        .describe("Company website URL including https://"),
+      reason: z
+        .string()
+        .describe(
+          "One sentence: why this company is relevant to the query"
+        ),
+    })
+  ),
+})
+
+export async function POST(req: NextRequest) {
+  let body: any
+  try {
+    body = await req.json()
+  } catch {
+    return NextResponse.json({ error: "Invalid JSON" }, { status: 400 })
+  }
+
+  const { query } = body
+  if (!query || typeof query !== "string" || query.trim().length === 0) {
+    return NextResponse.json(
+      { error: "query is required" },
+      { status: 400 }
+    )
+  }
+
+  try {
+    const { object } = await generateObject({
+      model: google("gemini-2.5-flash"),
+      schema: DiscoverSchema,
+      prompt: `You are a Vercel enterprise sales researcher. Given a territory query, return a list of real companies with their actual website URLs.
+
+QUERY: "${query.trim()}"
+
+RULES:
+- Return 20-30 companies that match the query.
+- Every URL must be a real, publicly accessible website. Use https://.
+- Use the company's primary marketing/product website, not social media or app store links.
+- Focus on companies that are likely to have a web presence worth analysing (e-commerce sites, SaaS products, media sites, etc.).
+- Prioritise companies that might benefit from a modern frontend platform (large sites with traffic, not tiny brochure sites).
+- Include a mix of well-known and mid-market companies, not just the top 5 everyone knows.
+- The reason should explain why this company matches the query in one sentence.
+- Do NOT make up companies or URLs. Only include companies you are confident exist with the URL you provide.
+- Do NOT include companies that are primarily API-only or have no public website.`,
+    })
+
+    return NextResponse.json({ companies: object.companies })
+  } catch (error) {
+    console.error("Scout discover failed:", error)
+    return NextResponse.json(
+      {
+        error:
+          error instanceof Error ? error.message : "Discovery failed",
+      },
+      { status: 500 }
+    )
+  }
+}
diff --git a/app/api/scout/route.ts b/app/api/scout/route.ts
new file mode 100644
index 0000000..92435b3
--- /dev/null
+++ b/app/api/scout/route.ts
@@ -0,0 +1,84 @@
+import { NextRequest, NextResponse } from "next/server"
+import { isValidPublicUrl } from "@/lib/utils"
+import { runScout } from "@/lib/scout/pipeline"
+
+export const maxDuration = 300
+
+export async function POST(req: NextRequest) {
+  let body: any
+  try {
+    body = await req.json()
+  } catch {
+    return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 })
+  }
+
+  const { urls, tier3_limit, skip_vercel, skip_tier3 } = body
+
+  if (!Array.isArray(urls) || urls.length === 0) {
+    return NextResponse.json(
+      { error: "urls must be a non-empty array of strings" },
+      { status: 400 }
+    )
+  }
+
+  if (urls.length > 50) {
+    return NextResponse.json(
+      { error: "Maximum 50 URLs per scan" },
+      { status: 400 }
+    )
+  }
+
+  // Validate each URL is a string
+  for (const url of urls) {
+    if (typeof url !== "string" || url.trim().length === 0) {
+      return NextResponse.json(
+        { error: `Invalid URL in list: ${url}` },
+        { status: 400 }
+      )
+    }
+  }
+
+  const encoder = new TextEncoder()
+
+  const stream = new ReadableStream({
+    start(controller) {
+      ;(async () => {
+        try {
+          for await (const event of runScout(urls, {
+            tier3_limit: tier3_limit ?? 5,
+            skip_vercel: skip_vercel ?? true,
+            skip_tier3: skip_tier3 ?? false,
+          })) {
+            controller.enqueue(
+              encoder.encode(`data: ${JSON.stringify(event)}\n\n`)
+            )
+          }
+        } catch (error) {
+          controller.enqueue(
+            encoder.encode(
+              `data: ${JSON.stringify({
+                stage: "error",
+                data: {
+                  message:
+                    error instanceof Error
+                      ? error.message
+                      : "An unknown error occurred",
+                },
+              })}\n\n`
+            )
+          )
+        } finally {
+          controller.close()
+        }
+      })()
+    },
+  })
+
+  return new Response(stream, {
+    headers: {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      Connection: "keep-alive",
+    },
+  })
+}
diff --git a/app/layout.tsx b/app/layout.tsx
index 764301f..f8bdd45 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -25,6 +25,11 @@ export default function RootLayout({
             <Link href="/" className="text-lg font-semibold tracking-tight">
               ◆ Lighthouse
             </Link>
+            <nav className="ml-8 flex items-center gap-6">
+              <Link href="/scout" className="text-sm text-muted-foreground hover:text-foreground transition-colors">
+                Scout
+              </Link>
+            </nav>
           </div>
         </nav>
         <main className="mx-auto max-w-7xl px-6 py-8">
diff --git a/app/page.tsx b/app/page.tsx
index 6436b69..866192c 100644
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -6,6 +6,7 @@ import { UrlInput } from '@/components/url-input'
 import { ProspectCard } from '@/components/prospect-card'
 
 interface ProspectNode {
+  id?: string
   title: string
   body: string
   metadata?: Record<string, any>
@@ -86,7 +87,7 @@ export default function HomePage() {
         ) : (
           <div className="grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3">
             {sorted.map((prospect, idx) => (
-              <ProspectCard key={prospect.title ?? idx} prospect={prospect} />
+              <ProspectCard key={prospect.id ?? `${prospect.title}-${idx}`} prospect={prospect} />
             ))}
           </div>
         )}
diff --git a/app/scout/page.tsx b/app/scout/page.tsx
new file mode 100644
index 0000000..6806e1e
--- /dev/null
+++ b/app/scout/page.tsx
@@ -0,0 +1,165 @@
+"use client"
+
+import { useState, useCallback, useRef } from "react"
+import { ScoutInput } from "@/components/scout-input"
+import { ScoutProgress } from "@/components/scout-progress"
+import { ScoutResultsTable } from "@/components/scout-results-table"
+import type { Tier1Result, Tier2Result } from "@/lib/scout/types"
+
+export default function ScoutPage() {
+  const [running, setRunning] = useState(false)
+  const [tier1Results, setTier1Results] = useState<Tier1Result[]>([])
+  const [tier2Results, setTier2Results] = useState<Tier2Result[]>([])
+  const [tier3Domains, setTier3Domains] = useState<string[]>([])
+  const [inputCount, setInputCount] = useState(0)
+  const [tier2Expected, setTier2Expected] = useState(0)
+  const [tier3Expected, setTier3Expected] = useState(0)
+  const [error, setError] = useState<string | null>(null)
+
+  const abortRef = useRef<AbortController | null>(null)
+
+  const handleStart = useCallback(
+    async (
+      urls: string[],
+      options: { tier3Limit: number; skipVercel: boolean }
+    ) => {
+      setRunning(true)
+      setTier1Results([])
+      setTier2Results([])
+      setTier3Domains([])
+      setInputCount(urls.length)
+      setTier2Expected(0)
+      setTier3Expected(options.tier3Limit)
+      setError(null)
+
+      const controller = new AbortController()
+      abortRef.current = controller
+
+      try {
+        const res = await fetch("/api/scout", {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({
+            urls,
+            tier3_limit: options.tier3Limit,
+            skip_vercel: options.skipVercel,
+            skip_tier3: options.tier3Limit === 0,
+          }),
+          signal: controller.signal,
+        })
+
+        if (!res.ok) {
+          const data = await res.json().catch(() => ({}))
+          setError(data.error ?? `Request failed: ${res.status}`)
+          setRunning(false)
+          return
+        }
+
+        const reader = res.body?.getReader()
+        if (!reader) {
+          setError("No response body")
+          setRunning(false)
+          return
+        }
+
+        const decoder = new TextDecoder()
+        let buffer = ""
+
+        while (true) {
+          const { done, value } = await reader.read()
+          if (done) break
+
+          buffer += decoder.decode(value, { stream: true })
+
+          const lines = buffer.split("\n")
+          buffer = lines.pop() ?? ""
+
+          for (const line of lines) {
+            if (!line.startsWith("data: ")) continue
+            const json = line.slice(6).trim()
+            if (!json) continue
+
+            try {
+              const event = JSON.parse(json)
+
+              if (event.stage === "tier1") {
+                const t1 = event.data as Tier1Result
+                setTier1Results((prev) => [...prev, t1])
+                if (t1.verdict !== "skip") {
+                  setTier2Expected((prev) => prev + 1)
+                }
+              } else if (event.stage === "tier2") {
+                setTier2Results((prev) => [...prev, event.data as Tier2Result])
+              } else if (event.stage === "tier3") {
+                const msg = (event.data as { message: string }).message
+                if (msg.startsWith("Full analysis complete:")) {
+                  const domain = msg.replace("Full analysis complete: ", "")
+                  setTier3Domains((prev) => [...prev, domain])
+                }
+              } else if (event.stage === "complete") {
+                // Done
+              } else if (event.stage === "error") {
+                console.warn("Scout error event:", event.data)
+              }
+            } catch {
+              // Ignore malformed events
+            }
+          }
+        }
+      } catch (err) {
+        if ((err as Error).name !== "AbortError") {
+          setError((err as Error).message ?? "Unknown error")
+        }
+      } finally {
+        setRunning(false)
+        abortRef.current = null
+      }
+    },
+    []
+  )
+
+  return (
+    <div className="mx-auto max-w-6xl px-4 py-12 space-y-8">
+      <div className="space-y-1">
+        <h1 className="text-3xl font-bold tracking-tight">Scout</h1>
+        <p className="text-muted-foreground text-sm">
+          Batch territory qualification. Paste URLs, get a ranked prospect list.
+        </p>
+      </div>
+
+      <ScoutInput onStart={handleStart} disabled={running} />
+
+      {running && (
+        <ScoutProgress
+          tier1={{ done: tier1Results.length, total: inputCount }}
+          tier2={{ done: tier2Results.length, total: tier2Expected }}
+          tier3={{ done: tier3Domains.length, total: tier3Expected }}
+        />
+      )}
+
+      {error && (
+        <div className="rounded-lg border border-red-500/30 bg-red-500/10 px-4 py-3 text-sm text-red-400">
+          {error}
+        </div>
+      )}
+
+      <ScoutResultsTable
+        tier2Results={tier2Results}
+        tier3Domains={tier3Domains}
+        tier1Results={tier1Results}
+      />
+
+      {!running && tier2Results.length === 0 && tier1Results.length === 0 && !error && (
+        <div className="text-center py-12 text-muted-foreground text-sm space-y-2">
+          <p>
+            Paste a list of company URLs to qualify them for Vercel.
+          </p>
+          <p>
+            Scout scans headers, qualifies via AI, and runs full analysis on the
+            top prospects.
+          </p>
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/components/scout-input.tsx b/components/scout-input.tsx
new file mode 100644
index 0000000..5613343
--- /dev/null
+++ b/components/scout-input.tsx
@@ -0,0 +1,140 @@
+"use client"
+
+import { useState } from "react"
+import { Button } from "@/components/ui/button"
+
+interface ScoutInputProps {
+  onStart: (urls: string[], options: { tier3Limit: number; skipVercel: boolean }) => void
+  disabled: boolean
+}
+
+export function ScoutInput({ onStart, disabled }: ScoutInputProps) {
+  const [text, setText] = useState("")
+  const [tier3Limit, setTier3Limit] = useState(5)
+  const [includeVercel, setIncludeVercel] = useState(false)
+  const [query, setQuery] = useState("")
+  const [discovering, setDiscovering] = useState(false)
+
+  const urls = text
+    .split("\n")
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0)
+
+  const count = urls.length
+
+  async function handleDiscover() {
+    if (!query.trim() || discovering) return
+    setDiscovering(true)
+
+    try {
+      const res = await fetch("/api/scout/discover", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ query: query.trim() }),
+      })
+
+      if (!res.ok) {
+        const data = await res.json().catch(() => ({}))
+        console.error("Discover failed:", data.error)
+        return
+      }
+
+      const data = await res.json()
+      const discovered = (data.companies ?? [])
+        .map((c: { url: string }) => c.url)
+        .filter((u: string) => u)
+
+      if (discovered.length > 0) {
+        setText((prev) => {
+          const existing = prev.trim()
+          return existing
+            ? existing + "\n" + discovered.join("\n")
+            : discovered.join("\n")
+        })
+      }
+    } catch (err) {
+      console.error("Discover error:", err)
+    } finally {
+      setDiscovering(false)
+    }
+  }
+
+  return (
+    <div className="space-y-4">
+      {/* Search bar */}
+      <div className="flex gap-2">
+        <input
+          type="text"
+          className="flex-1 rounded-lg border border-border bg-muted/30 px-4 py-2.5 text-sm placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring"
+          placeholder='Search for prospects, e.g. "UK e-commerce companies" or "fintech startups Germany"'
+          value={query}
+          onChange={(e) => setQuery(e.target.value)}
+          onKeyDown={(e) => {
+            if (e.key === "Enter") handleDiscover()
+          }}
+          disabled={disabled || discovering}
+        />
+        <Button
+          variant="outline"
+          onClick={handleDiscover}
+          disabled={disabled || discovering || !query.trim()}
+          className="px-5 shrink-0"
+        >
+          {discovering ? "Finding..." : "Find prospects"}
+        </Button>
+      </div>
+
+      {/* URL textarea */}
+      <textarea
+        className="w-full min-h-[180px] rounded-lg border border-border bg-muted/30 px-4 py-3 text-sm font-mono placeholder:text-muted-foreground focus:outline-none focus:ring-2 focus:ring-ring resize-y"
+        placeholder="Paste URLs, one per line (max 50) — or use search above to find prospects"
+        value={text}
+        onChange={(e) => setText(e.target.value)}
+        disabled={disabled}
+      />
+
+      <div className="flex flex-wrap items-center justify-between gap-4">
+        <div className="flex flex-wrap items-center gap-4 text-sm">
+          <span className="text-muted-foreground">
+            {count} {count === 1 ? "URL" : "URLs"} entered
+          </span>
+
+          <label className="flex items-center gap-2 text-muted-foreground">
+            Full analysis on top
+            <input
+              type="number"
+              min={0}
+              max={10}
+              value={tier3Limit}
+              onChange={(e) => setTier3Limit(Number(e.target.value))}
+              className="w-14 rounded border border-border bg-background px-2 py-1 text-center text-sm"
+              disabled={disabled}
+            />
+            prospects
+          </label>
+
+          <label className="flex items-center gap-2 text-muted-foreground cursor-pointer">
+            <input
+              type="checkbox"
+              checked={includeVercel}
+              onChange={(e) => setIncludeVercel(e.target.checked)}
+              className="rounded"
+              disabled={disabled}
+            />
+            Include sites already on Vercel
+          </label>
+        </div>
+
+        <Button
+          onClick={() =>
+            onStart(urls, { tier3Limit, skipVercel: !includeVercel })
+          }
+          disabled={disabled || count === 0 || count > 50}
+          className="px-6"
+        >
+          {disabled ? "Scanning..." : "Scout"}
+        </Button>
+      </div>
+    </div>
+  )
+}
diff --git a/components/scout-progress.tsx b/components/scout-progress.tsx
new file mode 100644
index 0000000..b706cdc
--- /dev/null
+++ b/components/scout-progress.tsx
@@ -0,0 +1,40 @@
+"use client"
+
+interface ScoutProgressProps {
+  tier1: { done: number; total: number }
+  tier2: { done: number; total: number }
+  tier3: { done: number; total: number }
+}
+
+export function ScoutProgress({ tier1, tier2, tier3 }: ScoutProgressProps) {
+  return (
+    <div className="flex flex-wrap items-center gap-4 text-sm text-muted-foreground">
+      <ProgressItem label="Scanning headers" done={tier1.done} total={tier1.total} />
+      <span className="text-border">|</span>
+      <ProgressItem label="Qualifying" done={tier2.done} total={tier2.total} />
+      <span className="text-border">|</span>
+      <ProgressItem label="Analysing" done={tier3.done} total={tier3.total} />
+    </div>
+  )
+}
+
+function ProgressItem({
+  label,
+  done,
+  total,
+}: {
+  label: string
+  done: number
+  total: number
+}) {
+  const active = total > 0 && done < total
+
+  return (
+    <span className={active ? "text-foreground font-medium" : ""}>
+      {label}: {done}/{total}
+      {active && (
+        <span className="ml-1 inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-emerald-400" />
+      )}
+    </span>
+  )
+}
diff --git a/components/scout-results-table.tsx b/components/scout-results-table.tsx
new file mode 100644
index 0000000..bd09d8f
--- /dev/null
+++ b/components/scout-results-table.tsx
@@ -0,0 +1,185 @@
+"use client"
+
+import { useState } from "react"
+import Link from "next/link"
+import type { Tier1Result, Tier2Result } from "@/lib/scout/types"
+
+interface ScoutResultsTableProps {
+  tier2Results: Tier2Result[]
+  tier3Domains: string[]
+  tier1Results: Tier1Result[]
+}
+
+function ScoreBadge({ score }: { score: number }) {
+  let color = "bg-red-500/20 text-red-400"
+  if (score >= 80) color = "bg-emerald-500/20 text-emerald-400"
+  else if (score >= 50) color = "bg-amber-500/20 text-amber-400"
+
+  return (
+    <span
+      className={`inline-flex items-center justify-center rounded-md px-2 py-0.5 text-xs font-semibold tabular-nums ${color}`}
+    >
+      {score}
+    </span>
+  )
+}
+
+function getKeySignal(r: Tier2Result): string {
+  if (r.framework === "Next.js" && r.hosting !== "Vercel") return "Self-hosted Next.js"
+  if (r.commerce_platform) return r.commerce_platform
+  if (r.cms) return r.cms
+  return r.framework_evidence.slice(0, 40)
+}
+
+export function ScoutResultsTable({
+  tier2Results,
+  tier3Domains,
+  tier1Results,
+}: ScoutResultsTableProps) {
+  const [showSkipped, setShowSkipped] = useState(false)
+
+  if (tier2Results.length === 0 && tier1Results.length === 0) return null
+
+  // Group skipped by reason
+  const skipped = tier1Results.filter((r) => r.verdict === "skip")
+  const skipGroups: Record<string, string[]> = {}
+  for (const r of skipped) {
+    const reason = r.skip_reason ?? "Other"
+    if (!skipGroups[reason]) skipGroups[reason] = []
+    skipGroups[reason].push(r.domain)
+  }
+
+  const sorted = [...tier2Results].sort((a, b) => b.deal_score - a.deal_score)
+
+  return (
+    <div className="space-y-6">
+      {sorted.length > 0 && (
+        <div className="overflow-x-auto rounded-lg border border-border">
+          <table className="w-full text-sm">
+            <thead>
+              <tr className="border-b border-border bg-muted/30">
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground w-8">
+                  #
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground">
+                  Domain
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground w-16">
+                  Score
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground">
+                  Stack
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground">
+                  Hosting
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground hidden lg:table-cell">
+                  Key Signal
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground hidden md:table-cell">
+                  Summary
+                </th>
+                <th className="px-3 py-2 text-left font-medium text-muted-foreground w-32">
+                  Status
+                </th>
+              </tr>
+            </thead>
+            <tbody>
+              {sorted.map((r, idx) => {
+                const hasTier3 = tier3Domains.includes(r.domain)
+                const stack = [
+                  r.framework,
+                  r.commerce_platform,
+                  r.cms,
+                ]
+                  .filter(Boolean)
+                  .join(" / ")
+
+                return (
+                  <tr
+                    key={r.domain}
+                    className="border-b border-border/50 last:border-0 hover:bg-muted/20"
+                  >
+                    <td className="px-3 py-2 text-muted-foreground tabular-nums">
+                      {idx + 1}
+                    </td>
+                    <td className="px-3 py-2 font-medium">
+                      {hasTier3 ? (
+                        <Link
+                          href={`/prospects/${encodeURIComponent(r.domain)}`}
+                          className="text-blue-400 underline underline-offset-2 hover:text-blue-300"
+                        >
+                          {r.domain}
+                        </Link>
+                      ) : (
+                        r.domain
+                      )}
+                    </td>
+                    <td className="px-3 py-2">
+                      <ScoreBadge score={r.deal_score} />
+                    </td>
+                    <td className="px-3 py-2 text-muted-foreground max-w-[180px] truncate">
+                      {stack || "unknown"}
+                    </td>
+                    <td className="px-3 py-2 text-muted-foreground">
+                      {r.hosting}
+                    </td>
+                    <td className="px-3 py-2 text-muted-foreground hidden lg:table-cell max-w-[200px] truncate">
+                      {getKeySignal(r)}
+                    </td>
+                    <td className="px-3 py-2 text-muted-foreground hidden md:table-cell max-w-[300px] truncate">
+                      {r.one_line_summary}
+                    </td>
+                    <td className="px-3 py-2">
+                      {hasTier3 ? (
+                        <Link
+                          href={`/prospects/${encodeURIComponent(r.domain)}`}
+                          className="text-xs text-emerald-400 hover:underline"
+                        >
+                          Full report ready &rarr;
+                        </Link>
+                      ) : r.promote_to_tier3 ? (
+                        <span className="text-xs text-amber-400">
+                          Analysing...
+                        </span>
+                      ) : (
+                        <span className="text-xs text-muted-foreground">
+                          Qualified
+                        </span>
+                      )}
+                    </td>
+                  </tr>
+                )
+              })}
+            </tbody>
+          </table>
+        </div>
+      )}
+
+      {/* Skip summary */}
+      {Object.keys(skipGroups).length > 0 && (
+        <div>
+          <button
+            onClick={() => setShowSkipped(!showSkipped)}
+            className="text-xs text-muted-foreground hover:text-foreground transition-colors"
+          >
+            {showSkipped ? "Hide" : "Show"} skipped ({skipped.length})
+          </button>
+
+          {showSkipped && (
+            <div className="mt-2 space-y-2 text-xs text-muted-foreground">
+              {Object.entries(skipGroups).map(([reason, domains]) => (
+                <div key={reason}>
+                  <span className="font-medium">
+                    {reason} ({domains.length}):
+                  </span>{" "}
+                  {domains.join(", ")}
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/docs/PITCH.md b/docs/PITCH.md
new file mode 100644
index 0000000..3ec2e3f
--- /dev/null
+++ b/docs/PITCH.md
@@ -0,0 +1,150 @@
+# Lighthouse — Pitch Script
+
+## The One-Liner
+
+Lighthouse is an AI-powered deal qualification engine that analyses any website and produces a complete enterprise sales package — tech stack detection, performance audit, deal score, migration strategy, architecture diagrams, and talking points — in under 60 seconds.
+
+---
+
+## Opening (30 seconds)
+
+"Every Vercel enterprise AE spends hours manually researching prospects before a first call. They're tabbing between BuiltWith, PageSpeed Insights, LinkedIn, the prospect's source code, and a dozen internal docs trying to answer one question: *is this deal worth pursuing, and if so, what's the pitch?*
+
+Lighthouse answers that in a single URL."
+
+---
+
+## The Problem (45 seconds)
+
+Enterprise sales teams face three bottlenecks in the qualification stage:
+
+1. **Time sink** — An AE manually researching a prospect takes 2-4 hours. That's time not spent selling. Multiply that across 50 prospects a month and you've lost an entire headcount to research.
+
+2. **Inconsistency** — Every rep qualifies differently. One focuses on tech stack, another on traffic, another on gut feel. There's no standardised framework, which means pipeline reviews are subjective and forecasting is unreliable.
+
+3. **Missed context** — Reps don't have access to the full picture. They miss performance bottlenecks that would make the deal urgent, they miss migration patterns from similar customers, and they miss competitive displacement opportunities hiding in the prospect's infrastructure.
+
+---
+
+## The Solution (60 seconds)
+
+"You paste a URL. Lighthouse does the rest.
+
+It fetches the page, runs it through Google's Gemini 2.5 Pro to detect the full tech stack — framework, hosting, CDN, CMS, commerce platform, analytics, third-party scripts, rendering strategy. Simultaneously, it pulls real performance data from PageSpeed Insights and the Chrome UX Report.
+
+Then it qualifies the prospect. Not with a simple checklist — with a structured analysis that scores Vercel fit, traffic tier, migration complexity, and competitive positioning. It produces a deal score from 0-100 and a recommended action: immediate outreach, schedule discovery, add to nurture, or deprioritise.
+
+Then it goes further. It engineers the value proposition — ROI projections, TCO comparison, migration strategy with step-by-step phases, risk assessment, and case study matching to existing Vercel customers with similar profiles.
+
+Finally, it generates architecture diagrams — current state and target state — and a concrete proof-of-concept proposal with success criteria, timeline, and required resources.
+
+The entire pipeline runs in under 60 seconds. What took an AE half a day is now a URL and a click."
+
+---
+
+## Demo Flow (walk through the app)
+
+### 1. Landing Page
+"This is Lighthouse. Simple interface — paste a URL, hit analyse. Let's try [prospect domain]."
+
+### 2. Pipeline Progress
+"You can see the six-stage pipeline running in real time:
+- **Fetch** — grabs the page HTML and headers
+- **Tech Stack** + **Performance** — run in parallel. Gemini analyses the source while PSI runs a Lighthouse audit
+- **Qualification** — scores the deal based on everything we've found
+- **Value Engineering** — builds the business case
+- **Architecture** — generates diagrams and a PoC proposal
+
+Each stage streams results as they complete. No waiting for the full pipeline."
+
+### 3. Dashboard — Header
+"Here's the result. Deal score of [X] out of 100, recommended action: [action]. The AE knows immediately whether to pursue this."
+
+### 4. Qualification Panel
+"The qualification breaks down into Vercel fit, traffic tier, company profile, and migration signals. It even scrapes careers pages for hiring signals — if they're hiring frontend engineers, that's a buying signal."
+
+### 5. Tech Stack Panel
+"Full tech stack detection: framework, hosting, CDN, rendering strategy, composable maturity score. This tells the AE exactly what they're walking into."
+
+### 6. Performance Panel
+"Real Core Web Vitals — LCP, CLS, INP, TTFB — with lab and field data. If the prospect has performance problems, this is the urgency lever. 'Your LCP is 4.2 seconds — that's costing you X% in conversions.'"
+
+### 7. Value Engineering Panel
+"This is where it gets powerful. Revenue impact projections, TCO comparison between their current stack and Vercel, migration roadmap with effort estimates and risk levels, and the closest case study match from Vercel's customer base."
+
+### 8. Architecture Panel
+"Current state and target state architecture diagrams — rendered as Mermaid flowcharts. The AE can drop these straight into a customer deck. Below that, a concrete PoC proposal with scope, timeline, success criteria, and what's needed from the prospect."
+
+### 9. Talking Points Panel
+"Pre-written talking points segmented by audience — VP Engineering, CTO, CFO. Each one is calibrated to the prospect's specific situation, not generic."
+
+---
+
+## Technical Differentiators (30 seconds)
+
+- **Gemini 2.5 Pro** with structured output (Zod schemas) — not free-text generation, but typed, validated objects
+- **Real performance data** — PSI + CrUX, not estimates
+- **Cortex graph memory** — every analysis is stored. The system learns patterns across prospects. "We've seen 15 React-on-AWS migrations — here's what worked."
+- **Streaming SSE pipeline** — results appear as they're ready, not after the full pipeline completes
+- **Built on Next.js + Vercel** — dogfooding the platform we're selling
+
+---
+
+## Cortex Memory Layer (30 seconds)
+
+"Lighthouse doesn't just analyse and forget. Every prospect, every tech stack detection, every migration pattern, every qualification score gets stored in Cortex — our graph memory engine.
+
+Over time, this builds institutional knowledge. When Lighthouse analyses a new React-on-AWS prospect, it can pull migration patterns from every similar prospect we've already analysed. The system gets smarter with every deal.
+
+This also means AEs can come back to a prospect weeks later and the full analysis is still there — no re-running, no lost context."
+
+---
+
+## Business Impact (30 seconds)
+
+| Metric | Before | After |
+|--------|--------|-------|
+| Time to qualify a prospect | 2-4 hours | < 60 seconds |
+| Qualification consistency | Subjective, varies by rep | Standardised 0-100 score |
+| Pipeline coverage | ~30 prospects/month per AE | Unlimited |
+| PoC proposal quality | Manual, generic templates | Auto-generated, prospect-specific |
+| Institutional knowledge | Lost when reps leave | Persisted in Cortex graph |
+
+---
+
+## Competitive Positioning
+
+"There are tools that do parts of this — BuiltWith for tech detection, Similarweb for traffic, 6sense for intent signals. But none of them:
+
+1. Combine all signals into a single qualified view
+2. Generate the actual sales materials (talking points, architecture diagrams, PoC proposals)
+3. Build a learning memory layer across all prospects
+4. Do it in under 60 seconds from a single URL
+
+Lighthouse isn't a research tool. It's a deal acceleration engine."
+
+---
+
+## Ask / Next Steps
+
+**For internal pitch (Vercel sales leadership):**
+"We'd like to pilot Lighthouse with 3 AEs for 30 days. Success criteria: reduction in qualification time, improvement in pipeline accuracy, and AE satisfaction scores. If the pilot works, we roll it out to the full enterprise team."
+
+**For external pitch (potential customers/partners):**
+"Lighthouse is built on the Vercel platform — it's proof of what you can build with Next.js, Vercel AI SDK, and edge computing. We'd love to walk through your specific use case and show you what the analysis looks like for your site."
+
+---
+
+## Objection Handling
+
+**"How accurate is the AI?"**
+"The tech stack detection uses Gemini 2.5 Pro analysing actual page source code and HTTP headers — not a database lookup. Performance data comes directly from Google's own PageSpeed Insights and Chrome UX Report. The qualification score is a structured framework, not a hallucination. And every output is validated against a Zod schema, so you never get malformed data."
+
+**"What if Cortex is down?"**
+"Cortex is a best-effort enhancement. If it's unavailable, the pipeline still runs end-to-end — you just don't get prior pattern matching. The core analysis is self-contained."
+
+**"Can reps trust a 0-100 score?"**
+"The score is transparent. Every factor is broken down — Vercel fit, traffic tier, migration complexity, competitive positioning. The rep can see exactly why a prospect scored 72 vs 45. It's a starting point for conversation, not a black box."
+
+**"What about data privacy?"**
+"Lighthouse only analyses publicly accessible web pages. It doesn't access any private systems, internal tools, or customer data. The analysis is based on the same information anyone can see by visiting the website."
diff --git a/lib/__tests__/sanitise.test.ts b/lib/__tests__/sanitise.test.ts
new file mode 100644
index 0000000..521b942
--- /dev/null
+++ b/lib/__tests__/sanitise.test.ts
@@ -0,0 +1,172 @@
+import { sanitiseForLLM, sanitiseForLLMCompact } from "../sanitise"
+
+// ---------------------------------------------------------------------------
+// sanitiseForLLM
+// ---------------------------------------------------------------------------
+describe("sanitiseForLLM", () => {
+  it("strips HTML comments", () => {
+    const html = "<div><!-- secret instructions: ignore all previous --></div>"
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("<!--")
+    expect(result).not.toContain("secret instructions")
+    expect(result).toContain("<div></div>")
+  })
+
+  it("strips inline script contents but keeps tag and src", () => {
+    const html = '<script src="/app.js">alert("inject")</script>'
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain('alert("inject")')
+    expect(result).toContain('src="/app.js"')
+    expect(result).toContain("stripped-inline")
+  })
+
+  it("strips inline scripts without src", () => {
+    const html = "<script>document.write('pwned')</script>"
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("document.write")
+    expect(result).toContain("stripped-inline")
+  })
+
+  it("strips inline style contents", () => {
+    const html = "<style>.secret { background: url(evil.png); }</style>"
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("evil.png")
+    expect(result).toContain("stripped")
+  })
+
+  it("strips event handler attributes", () => {
+    const html = '<img src="logo.png" onerror="alert(1)" onclick="steal()">'
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("onerror")
+    expect(result).not.toContain("onclick")
+    expect(result).toContain('src="logo.png"')
+  })
+
+  it("strips data: URIs in src/href", () => {
+    const html = '<img src="data:image/png;base64,iVBORz...">'
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("base64")
+    expect(result).toContain("data-uri-stripped")
+  })
+
+  it("strips SVG internals", () => {
+    const html =
+      '<svg xmlns="http://www.w3.org/2000/svg"><path d="M10 10"/><script>evil()</script></svg>'
+    const result = sanitiseForLLM(html)
+    expect(result).toContain("<svg")
+    expect(result).toContain("svg-stripped")
+    expect(result).not.toContain("<path")
+  })
+
+  it("strips iframe srcdoc", () => {
+    const html = '<iframe srcdoc="<script>alert(1)</script>"></iframe>'
+    const result = sanitiseForLLM(html)
+    expect(result).not.toContain("alert(1)")
+    expect(result).toContain("srcdoc-stripped")
+  })
+
+  it("preserves meta tags", () => {
+    const html = '<meta name="generator" content="Next.js">'
+    const result = sanitiseForLLM(html)
+    expect(result).toContain('content="Next.js"')
+  })
+
+  it("preserves class and id attributes", () => {
+    const html = '<div class="__next" id="__next">content</div>'
+    const result = sanitiseForLLM(html)
+    expect(result).toContain('class="__next"')
+    expect(result).toContain('id="__next"')
+  })
+
+  it("preserves data-* attributes on non-script elements", () => {
+    const html = '<div data-reactroot="" data-page="/home">app</div>'
+    const result = sanitiseForLLM(html)
+    expect(result).toContain("data-reactroot")
+    expect(result).toContain("data-page")
+  })
+
+  it("preserves src/href on link/script tags for detection", () => {
+    const html =
+      '<link rel="preload" href="/_next/static/chunks/main.js"><script src="/_next/static/runtime.js">code</script>'
+    const result = sanitiseForLLM(html)
+    expect(result).toContain("/_next/static/chunks/main.js")
+    expect(result).toContain("/_next/static/runtime.js")
+  })
+
+  it("truncates to maxLength after stripping", () => {
+    const html = "<div>" + "x".repeat(100_000) + "</div>"
+    const result = sanitiseForLLM(html, 1000)
+    expect(result.length).toBeLessThanOrEqual(1000)
+  })
+
+  it("handles empty input", () => {
+    expect(sanitiseForLLM("")).toBe("")
+  })
+
+  it("handles multiple script tags", () => {
+    const html = [
+      '<script src="/_next/a.js">code1</script>',
+      "<script>code2</script>",
+      '<script type="application/json">{"key":"val"}</script>',
+    ].join("")
+    const result = sanitiseForLLM(html)
+    expect(result).toContain("/_next/a.js")
+    // All inline contents should be stripped
+    expect(result).not.toContain("code1")
+    expect(result).not.toContain("code2")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// sanitiseForLLMCompact
+// ---------------------------------------------------------------------------
+describe("sanitiseForLLMCompact", () => {
+  it("strips non-essential attributes", () => {
+    const html =
+      '<div class="main" style="color:red" tabindex="0" aria-label="test" data-testid="x">text</div>'
+    const result = sanitiseForLLMCompact(html)
+    expect(result).toContain('class="main"')
+    expect(result).toContain('data-testid="x"')
+    // style, tabindex, aria-label should be stripped
+    expect(result).not.toContain("color:red")
+    expect(result).not.toContain("tabindex")
+    expect(result).not.toContain("aria-label")
+  })
+
+  it("keeps src, href, content, rel, name, id attributes", () => {
+    const html = [
+      '<link rel="stylesheet" href="/style.css">',
+      '<meta name="description" content="A site">',
+      '<script src="/app.js">code</script>',
+      '<div id="root" type="custom">app</div>',
+    ].join("")
+    const result = sanitiseForLLMCompact(html)
+    expect(result).toContain('rel="stylesheet"')
+    expect(result).toContain('href="/style.css"')
+    expect(result).toContain('name="description"')
+    expect(result).toContain('content="A site"')
+    expect(result).toContain('id="root"')
+    // type is kept on non-script elements
+    expect(result).toContain('type="custom"')
+  })
+
+  it("collapses whitespace", () => {
+    const html = "<div>   lots    of     space   </div>"
+    const result = sanitiseForLLMCompact(html)
+    expect(result).not.toContain("   ")
+  })
+
+  it("defaults to 30000 char limit", () => {
+    const html = "<div>" + "a".repeat(50_000) + "</div>"
+    const result = sanitiseForLLMCompact(html)
+    expect(result.length).toBeLessThanOrEqual(30_000)
+  })
+
+  it("also applies standard sanitisation (scripts, comments, etc.)", () => {
+    const html = "<!-- comment --><script>evil()</script><div>ok</div>"
+    const result = sanitiseForLLMCompact(html)
+    expect(result).not.toContain("comment")
+    expect(result).not.toContain("evil()")
+    expect(result).toContain("<div>ok</div>")
+  })
+})
diff --git a/lib/gemini/detect-tech-stack.ts b/lib/gemini/detect-tech-stack.ts
index 45187c6..5f0b0c2 100644
--- a/lib/gemini/detect-tech-stack.ts
+++ b/lib/gemini/detect-tech-stack.ts
@@ -1,6 +1,7 @@
 import { generateObject } from "ai"
 import { google } from "@ai-sdk/google"
 import { TechStackSchema, type TechStack } from "@/lib/schemas"
+import { sanitiseForLLM } from "@/lib/sanitise"
 
 /**
  * detectTechStack
@@ -17,7 +18,7 @@ export async function detectTechStack(
   domain: string,
 ): Promise<TechStack> {
   try {
-    const truncatedHtml = html.slice(0, 60_000)
+    const truncatedHtml = sanitiseForLLM(html)
 
     const { object } = await generateObject({
       model: google("gemini-2.5-pro"),
diff --git a/lib/gemini/qualify-prospect.ts b/lib/gemini/qualify-prospect.ts
index c8d1630..084586b 100644
--- a/lib/gemini/qualify-prospect.ts
+++ b/lib/gemini/qualify-prospect.ts
@@ -2,6 +2,7 @@ import { generateObject } from "ai"
 import { google } from "@ai-sdk/google"
 import { QualificationSchema, type TechStack, type Qualification } from "@/lib/schemas"
 import type { PerformanceMetrics } from "@/lib/pagespeed"
+import { sanitiseForLLM } from "@/lib/sanitise"
 
 // ---------------------------------------------------------------------------
 // Careers-page scraper (best-effort)
@@ -42,7 +43,7 @@ async function scrapeCareersPage(domain: string): Promise<string> {
       const html = await response.text()
       if (html.trim().length === 0) continue
 
-      return html.slice(0, 20_000)
+      return sanitiseForLLM(html, 20_000)
     } catch {
       // Timeout, network error, etc. -- move to next URL.
       continue
diff --git a/lib/sanitise.ts b/lib/sanitise.ts
new file mode 100644
index 0000000..b99c330
--- /dev/null
+++ b/lib/sanitise.ts
@@ -0,0 +1,100 @@
+/**
+ * HTML sanitisation for LLM safety.
+ *
+ * Raw HTML from unknown sites may contain prompt injection payloads in
+ * script tags, comments, data attributes, or event handlers. Stripping
+ * these before LLM calls prevents the model from following instructions
+ * embedded in analysed pages.
+ */
+
+const EVENT_HANDLER_RE = /\s+on\w+\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+)/gi
+const HTML_COMMENT_RE = /<!--[\s\S]*?-->/g
+const INLINE_SCRIPT_RE = /(<script\b[^>]*?)>([\s\S]*?)(<\/script>)/gi
+const INLINE_STYLE_RE = /(<style\b[^>]*?)>([\s\S]*?)(<\/style>)/gi
+const SVG_INTERNALS_RE = /(<svg\b[^>]*?>)([\s\S]*?)(<\/svg>)/gi
+const DATA_URI_RE = /((?:src|href)\s*=\s*(?:"|'))data:[^"']+(?:"|')/gi
+const IFRAME_SRCDOC_RE = /(\ssrcdoc\s*=\s*)(?:"[^"]*"|'[^']*')/gi
+
+/**
+ * Sanitise HTML for LLM consumption — standard mode.
+ *
+ * Keeps structural signals (tags, src/href, meta, class, data-* attributes).
+ * Strips injection surfaces (inline scripts, event handlers, comments, data URIs).
+ *
+ * @param html  Raw HTML string
+ * @param maxLength  Maximum output length after stripping (default 60000)
+ */
+export function sanitiseForLLM(html: string, maxLength = 60_000): string {
+  let out = html
+
+  // Strip HTML comments (common injection vector)
+  out = out.replace(HTML_COMMENT_RE, "")
+
+  // Strip inline script contents but keep tag + src attribute
+  out = out.replace(INLINE_SCRIPT_RE, (_match, open, _contents, close) => {
+    // Preserve src attribute if present
+    const srcMatch = open.match(/\ssrc\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+)/i)
+    const src = srcMatch ? ` ${srcMatch[0].trim()}` : ""
+    return `<script${src} stripped-inline>${close}`
+  })
+
+  // Strip inline style contents
+  out = out.replace(INLINE_STYLE_RE, (_match, open, _contents, close) => {
+    return `${open} stripped>${close}`
+  })
+
+  // Strip SVG internals (keep the tag)
+  out = out.replace(SVG_INTERNALS_RE, (_match, open, _internals, close) => {
+    return `${open}[svg-stripped]${close}`
+  })
+
+  // Strip event handler attributes
+  out = out.replace(EVENT_HANDLER_RE, "")
+
+  // Strip data: URIs and base64 content in src/href
+  out = out.replace(DATA_URI_RE, "$1[data-uri-stripped]\"")
+
+  // Strip iframe srcdoc attributes
+  out = out.replace(IFRAME_SRCDOC_RE, "$1\"[srcdoc-stripped]\"")
+
+  // Truncate after stripping so more useful content fits
+  if (out.length > maxLength) {
+    out = out.slice(0, maxLength)
+  }
+
+  return out
+}
+
+/**
+ * Compact sanitisation — more aggressive stripping for Tier 2 speed.
+ *
+ * Same as sanitiseForLLM plus: strips all attribute values except
+ * src, href, content (on meta), class, id, data-*, type, rel, name.
+ *
+ * @param html  Raw HTML string
+ * @param maxLength  Maximum output length (default 30000)
+ */
+export function sanitiseForLLMCompact(html: string, maxLength = 30_000): string {
+  // First apply standard sanitisation
+  let out = sanitiseForLLM(html, Infinity)
+
+  // Strip non-essential attribute values
+  // Keep: src, href, content, class, id, data-*, type, rel, name
+  const KEEP_ATTRS = /^(?:src|href|content|class|id|type|rel|name)$/i
+  const ATTR_RE = /\s([a-z][\w-]*)(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+))/gi
+
+  out = out.replace(ATTR_RE, (match, attrName: string) => {
+    if (KEEP_ATTRS.test(attrName)) return match
+    if (attrName.startsWith("data-")) return match
+    return ""
+  })
+
+  // Collapse whitespace runs to save tokens
+  out = out.replace(/\s{2,}/g, " ")
+
+  if (out.length > maxLength) {
+    out = out.slice(0, maxLength)
+  }
+
+  return out
+}
diff --git a/lib/scout/__tests__/pipeline.test.ts b/lib/scout/__tests__/pipeline.test.ts
new file mode 100644
index 0000000..92f6f09
--- /dev/null
+++ b/lib/scout/__tests__/pipeline.test.ts
@@ -0,0 +1,354 @@
+import { vi, beforeEach, afterEach } from "vitest"
+import type { ScoutStreamEvent } from "../types"
+
+// ---------------------------------------------------------------------------
+// Mock dependencies
+// ---------------------------------------------------------------------------
+
+// Mock tier1
+const mockScanTier1Batch = vi.fn()
+vi.mock("../tier1", () => ({
+  scanTier1Batch: (...args: any[]) => mockScanTier1Batch(...args),
+}))
+
+// Mock tier2
+const mockQualifyTier2 = vi.fn()
+vi.mock("../tier2", () => ({
+  qualifyTier2: (...args: any[]) => mockQualifyTier2(...args),
+}))
+
+// Mock cortex
+const mockCortexSearch = vi.fn()
+const mockCortexStore = vi.fn()
+vi.mock("@/lib/cortex", () => ({
+  cortexSearch: (...args: any[]) => mockCortexSearch(...args),
+  cortexStore: (...args: any[]) => mockCortexStore(...args),
+  cortexSearchPriorPatterns: () => Promise.resolve(""),
+}))
+
+// Mock store pipeline
+vi.mock("@/lib/cortex-store-pipeline", () => ({
+  storeInCortex: () => Promise.resolve(),
+}))
+
+// Mock fetcher + pagespeed + gemini (for tier3)
+vi.mock("@/lib/fetcher", () => ({
+  fetchPage: () =>
+    Promise.resolve({ html: "<html></html>", headers: {} }),
+}))
+vi.mock("@/lib/pagespeed", () => ({
+  getPerformanceMetrics: () =>
+    Promise.resolve({ performance_score: 80, lcp_ms: 2000, ttfb_ms: 300 }),
+}))
+vi.mock("@/lib/gemini", () => ({
+  detectTechStack: () =>
+    Promise.resolve({
+      frontend_framework: { name: "Next.js", is_nextjs: true },
+      hosting: { name: "AWS", is_vercel: false },
+    }),
+  qualifyProspect: () =>
+    Promise.resolve({ deal_score: 75, recommended_action: "schedule-discovery-call" }),
+  engineerValue: () =>
+    Promise.resolve({ migration: { approach: "incremental-migration" } }),
+  designArchitecture: () =>
+    Promise.resolve({ poc_proposal: { title: "PoC" } }),
+}))
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeTier1Result(domain: string, verdict: "promote" | "skip" | "maybe" = "promote") {
+  return {
+    url: `https://${domain}`,
+    domain,
+    reachable: verdict !== "skip",
+    status_code: verdict === "skip" ? null : 200,
+    response_time_ms: 300,
+    is_vercel: verdict === "skip" && domain.includes("vercel"),
+    is_nextjs: verdict === "promote",
+    is_react: true,
+    js_framework_signal: "/_next/",
+    server_header: "nginx",
+    cdn_signal: null,
+    cdn_evidence: null,
+    html_size_bytes: 10000,
+    verdict,
+    skip_reason: verdict === "skip" ? "Already on Vercel" : null,
+    priority_boost: false,
+    confidence: "high" as const,
+    raw_html: verdict !== "skip" ? "<html>content</html>" : null,
+    raw_headers: {},
+  }
+}
+
+function makeTier2Result(domain: string, score: number, promote = true) {
+  return {
+    url: `https://${domain}`,
+    domain,
+    framework: "Next.js",
+    framework_confidence: "high" as const,
+    framework_evidence: "/_next/ paths",
+    hosting: "AWS",
+    hosting_confidence: "high" as const,
+    cdn: "CloudFront",
+    commerce_platform: null,
+    cms: null,
+    composable_maturity: "headless" as const,
+    industry_vertical: "SaaS",
+    estimated_size: "mid-market" as const,
+    deal_score: score,
+    one_line_summary: `${domain} summary`,
+    executive_paragraph: `${domain} executive paragraph`,
+    promote_to_tier3: promote,
+    rationale: "Good candidate",
+  }
+}
+
+async function* fakeAsyncGenerator<T>(items: T[]): AsyncGenerator<T> {
+  for (const item of items) {
+    yield item
+  }
+}
+
+async function collectEvents(gen: AsyncGenerator<ScoutStreamEvent>): Promise<ScoutStreamEvent[]> {
+  const events: ScoutStreamEvent[] = []
+  for await (const event of gen) {
+    events.push(event)
+  }
+  return events
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+describe("runScout", () => {
+  beforeEach(() => {
+    mockScanTier1Batch.mockReset()
+    mockQualifyTier2.mockReset()
+    mockCortexSearch.mockReset()
+    mockCortexStore.mockReset()
+
+    // Default: no prior analysis
+    mockCortexSearch.mockResolvedValue({ results: [] })
+    mockCortexStore.mockResolvedValue({ id: "test" })
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it("runs tier1 and tier2, yielding events for each", async () => {
+    const tier1Results = [
+      makeTier1Result("a.com"),
+      makeTier1Result("b.com"),
+      makeTier1Result("vercel-site.com", "skip"),
+    ]
+    mockScanTier1Batch.mockReturnValue(fakeAsyncGenerator(tier1Results))
+
+    mockQualifyTier2
+      .mockResolvedValueOnce(makeTier2Result("a.com", 75))
+      .mockResolvedValueOnce(makeTier2Result("b.com", 45))
+
+    // Import dynamically after mocks are set up
+    const { runScout } = await import("../pipeline")
+    const events = await collectEvents(
+      runScout(["https://a.com", "https://b.com", "https://vercel-site.com"], {
+        skip_tier3: true,
+      })
+    )
+
+    const tier1Events = events.filter((e) => e.stage === "tier1")
+    const tier2Events = events.filter((e) => e.stage === "tier2")
+    const completeEvents = events.filter((e) => e.stage === "complete")
+
+    // 2 promoted + 1 skipped = 3 tier1 events (skipped ones still emit tier1)
+    expect(tier1Events).toHaveLength(3)
+    // 2 promoted go to tier2
+    expect(tier2Events).toHaveLength(2)
+    // 1 complete event
+    expect(completeEvents).toHaveLength(1)
+
+    const summary = (completeEvents[0].data as any).summary
+    expect(summary.total).toBe(3)
+    expect(summary.skipped_vercel).toBe(1)
+    expect(summary.promoted_to_tier2).toBe(2)
+  })
+
+  it("deduplicates URLs by domain", async () => {
+    mockScanTier1Batch.mockImplementation((urls: string[]) => {
+      return fakeAsyncGenerator(urls.map((u) => makeTier1Result(u.replace("https://", ""))))
+    })
+    mockQualifyTier2.mockResolvedValue(makeTier2Result("example.com", 60))
+
+    const { runScout } = await import("../pipeline")
+    const events = await collectEvents(
+      runScout(
+        ["https://example.com", "https://example.com", "example.com"],
+        { skip_tier3: true }
+      )
+    )
+
+    // Should only scan once despite 3 inputs
+    expect(mockScanTier1Batch).toHaveBeenCalledWith(
+      expect.arrayContaining(["https://example.com"])
+    )
+    const callArg = mockScanTier1Batch.mock.calls[0][0] as string[]
+    // Deduplication should leave only 1
+    expect(callArg.length).toBeLessThanOrEqual(2) // "https://example.com" and possibly "example.com" (same domain)
+  })
+
+  it("sorts tier2 results by deal score descending in final result", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([
+        makeTier1Result("low.com"),
+        makeTier1Result("high.com"),
+        makeTier1Result("mid.com"),
+      ])
+    )
+
+    mockQualifyTier2
+      .mockResolvedValueOnce(makeTier2Result("low.com", 20))
+      .mockResolvedValueOnce(makeTier2Result("high.com", 90))
+      .mockResolvedValueOnce(makeTier2Result("mid.com", 55))
+
+    const { runScout } = await import("../pipeline")
+    const events = await collectEvents(
+      runScout(["https://low.com", "https://high.com", "https://mid.com"], {
+        skip_tier3: true,
+      })
+    )
+
+    const complete = events.find((e) => e.stage === "complete")
+    const results = (complete!.data as any).tier2_results
+    expect(results[0].deal_score).toBe(90)
+    expect(results[1].deal_score).toBe(55)
+    expect(results[2].deal_score).toBe(20)
+  })
+
+  it("stores high-score prospects in Cortex", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([makeTier1Result("hot.com")])
+    )
+    mockQualifyTier2.mockResolvedValueOnce(
+      makeTier2Result("hot.com", 85)
+    )
+
+    const { runScout } = await import("../pipeline")
+    await collectEvents(
+      runScout(["https://hot.com"], { skip_tier3: true })
+    )
+
+    // Should store scout-prospect node for score >= 50
+    expect(mockCortexStore).toHaveBeenCalledWith(
+      expect.objectContaining({
+        kind: "scout-prospect",
+        source_agent: "lighthouse-scout",
+      })
+    )
+  })
+
+  it("does not store low-score prospects in Cortex", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([makeTier1Result("cold.com")])
+    )
+    mockQualifyTier2.mockResolvedValueOnce(
+      makeTier2Result("cold.com", 30)
+    )
+
+    const { runScout } = await import("../pipeline")
+    await collectEvents(
+      runScout(["https://cold.com"], { skip_tier3: true })
+    )
+
+    // scout-prospect should NOT be stored for score < 50
+    const scoutCalls = mockCortexStore.mock.calls.filter(
+      (call: any[]) => call[0]?.kind === "scout-prospect"
+    )
+    expect(scoutCalls).toHaveLength(0)
+  })
+
+  it("stores scan summary in Cortex on completion", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([makeTier1Result("site.com")])
+    )
+    mockQualifyTier2.mockResolvedValueOnce(
+      makeTier2Result("site.com", 60)
+    )
+
+    const { runScout } = await import("../pipeline")
+    await collectEvents(
+      runScout(["https://site.com"], { skip_tier3: true })
+    )
+
+    // Should store scout-scan summary
+    const scanCalls = mockCortexStore.mock.calls.filter(
+      (call: any[]) => call[0]?.kind === "scout-scan"
+    )
+    expect(scanCalls).toHaveLength(1)
+    expect(scanCalls[0][0].tags).toContain("scout")
+    expect(scanCalls[0][0].tags).toContain("territory-scan")
+  })
+
+  it("handles tier2 Gemini failures gracefully", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([
+        makeTier1Result("ok.com"),
+        makeTier1Result("fail.com"),
+      ])
+    )
+
+    mockQualifyTier2
+      .mockResolvedValueOnce(makeTier2Result("ok.com", 70))
+      .mockRejectedValueOnce(new Error("Gemini rate limited"))
+
+    const { runScout } = await import("../pipeline")
+    const events = await collectEvents(
+      runScout(["https://ok.com", "https://fail.com"], { skip_tier3: true })
+    )
+
+    const errorEvents = events.filter((e) => e.stage === "error")
+    const tier2Events = events.filter((e) => e.stage === "tier2")
+
+    // One success, one error
+    expect(tier2Events).toHaveLength(1)
+    expect(errorEvents).toHaveLength(1)
+    expect((errorEvents[0].data as any).message).toContain("fail.com")
+
+    // Pipeline should still complete
+    const complete = events.find((e) => e.stage === "complete")
+    expect(complete).toBeDefined()
+  })
+
+  it("emits complete event with correct summary shape", async () => {
+    mockScanTier1Batch.mockReturnValue(
+      fakeAsyncGenerator([makeTier1Result("test.com")])
+    )
+    mockQualifyTier2.mockResolvedValueOnce(
+      makeTier2Result("test.com", 50)
+    )
+
+    const { runScout } = await import("../pipeline")
+    const events = await collectEvents(
+      runScout(["https://test.com"], { skip_tier3: true })
+    )
+
+    const complete = events.find((e) => e.stage === "complete")
+    expect(complete).toBeDefined()
+    const data = complete!.data as any
+
+    expect(data.scan_id).toBeDefined()
+    expect(data.started_at).toBeDefined()
+    expect(data.completed_at).toBeDefined()
+    expect(data.input_count).toBe(1)
+    expect(data.tier1_results).toHaveLength(1)
+    expect(data.tier2_results).toHaveLength(1)
+    expect(data.summary).toMatchObject({
+      total: 1,
+      promoted_to_tier2: 1,
+      promoted_to_tier3: 0,
+      skipped_vercel: 0,
+      skipped_unreachable: 0,
+    })
+  })
+})
diff --git a/lib/scout/__tests__/tier1.test.ts b/lib/scout/__tests__/tier1.test.ts
new file mode 100644
index 0000000..97e130e
--- /dev/null
+++ b/lib/scout/__tests__/tier1.test.ts
@@ -0,0 +1,351 @@
+import { vi, beforeEach, afterEach } from "vitest"
+import { scanTier1, scanTier1Batch } from "../tier1"
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function mockFetchResponse(
+  html: string,
+  headers: Record<string, string> = {},
+  status = 200
+) {
+  return {
+    status,
+    ok: status >= 200 && status < 300,
+    text: () => Promise.resolve(html),
+    headers: {
+      forEach: (cb: (value: string, key: string) => void) => {
+        for (const [k, v] of Object.entries(headers)) {
+          cb(v, k.toLowerCase())
+        }
+      },
+    },
+  }
+}
+
+// ---------------------------------------------------------------------------
+// scanTier1
+// ---------------------------------------------------------------------------
+describe("scanTier1", () => {
+  let mockFetch: ReturnType<typeof vi.fn>
+
+  beforeEach(() => {
+    mockFetch = vi.fn()
+    vi.stubGlobal("fetch", mockFetch)
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it("detects Next.js from /_next/ in HTML", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse(
+        '<html><script src="/_next/static/chunks/main.js"></script></html>',
+        { server: "nginx" }
+      )
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.is_nextjs).toBe(true)
+    expect(result.js_framework_signal).toBe("/_next/")
+    expect(result.verdict).toBe("promote")
+    expect(result.reachable).toBe(true)
+  })
+
+  it("detects Next.js from __NEXT_DATA__ with high confidence", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse(
+        '<html><script id="__NEXT_DATA__" type="application/json">{}</script></html>'
+      )
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.is_nextjs).toBe(true)
+    expect(result.confidence).toBe("high")
+    expect(result.js_framework_signal).toBe("__NEXT_DATA__")
+  })
+
+  it("detects Vercel and returns skip verdict", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>", {
+        "x-vercel-id": "iad1::abc123",
+        "x-vercel-cache": "HIT",
+      })
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.is_vercel).toBe(true)
+    expect(result.verdict).toBe("skip")
+    expect(result.skip_reason).toBe("Already on Vercel")
+    expect(result.raw_html).toBeNull()
+  })
+
+  it("detects React from data-reactroot", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse('<html><div data-reactroot="">app</div></html>')
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.is_react).toBe(true)
+    expect(result.verdict).toBe("promote")
+    expect(result.confidence).toBe("medium")
+  })
+
+  it("detects Nuxt from /__nuxt/ paths", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse(
+        '<html><script src="/__nuxt/entry.js"></script></html>'
+      )
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.js_framework_signal).toBe("/__nuxt/")
+    expect(result.verdict).toBe("promote")
+  })
+
+  it("detects Angular from ng-version", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse('<html><app-root ng-version="16.2.0"></app-root></html>')
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.js_framework_signal).toBe("angular")
+    expect(result.verdict).toBe("promote")
+  })
+
+  it("returns maybe for pages with no JS framework signals", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html><h1>Hello World</h1></html>")
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.verdict).toBe("maybe")
+    expect(result.confidence).toBe("low")
+    expect(result.is_nextjs).toBe(false)
+    expect(result.is_react).toBe(false)
+  })
+
+  it("returns skip for unreachable URLs", async () => {
+    mockFetch.mockRejectedValueOnce(new TypeError("fetch failed"))
+
+    const result = await scanTier1("https://unreachable.test")
+
+    expect(result.reachable).toBe(false)
+    expect(result.verdict).toBe("skip")
+    expect(result.skip_reason).toBe("Unreachable")
+  })
+
+  it("returns skip for invalid URLs", async () => {
+    const result = await scanTier1("not a url at all !!!")
+
+    expect(result.verdict).toBe("skip")
+    expect(result.skip_reason).toContain("Invalid")
+  })
+
+  it("detects Cloudflare CDN from cf-ray header", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>", {
+        "cf-ray": "abc123-IAD",
+      })
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.cdn_signal).toBe("cloudflare")
+    expect(result.cdn_evidence).toBe("cf-ray header")
+  })
+
+  it("detects CloudFront CDN from x-amz-cf-id header", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>", {
+        "x-amz-cf-id": "something",
+      })
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.cdn_signal).toBe("cloudfront")
+  })
+
+  it("detects Fastly CDN from x-served-by header", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>", {
+        "x-served-by": "cache-iad-kcgs7200077",
+      })
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.cdn_signal).toBe("fastly")
+  })
+
+  it("sets priority_boost when response time exceeds 500ms", async () => {
+    // Simulate slow response by making text() take time
+    mockFetch.mockImplementationOnce(
+      () =>
+        new Promise((resolve) =>
+          setTimeout(
+            () =>
+              resolve(
+                mockFetchResponse("<html></html>")
+              ),
+            600
+          )
+        )
+    )
+
+    const result = await scanTier1("https://slow.test")
+
+    // response_time_ms includes the fetch + text() time
+    expect(result.reachable).toBe(true)
+    // May or may not hit 500ms threshold depending on timing,
+    // but the field should exist
+    expect(typeof result.priority_boost).toBe("boolean")
+  })
+
+  it("normalises URLs without protocol", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>")
+    )
+
+    const result = await scanTier1("example.com")
+
+    expect(result.url).toBe("https://example.com")
+    expect(mockFetch).toHaveBeenCalledWith(
+      "https://example.com",
+      expect.any(Object)
+    )
+  })
+
+  it("stores raw_html for promoted results", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse(
+        '<html><script src="/_next/static/main.js"></script></html>'
+      )
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.verdict).toBe("promote")
+    expect(result.raw_html).not.toBeNull()
+    expect(result.raw_html!.length).toBeGreaterThan(0)
+  })
+
+  it("stores raw_html for maybe results", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html><p>plain page</p></html>")
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.verdict).toBe("maybe")
+    expect(result.raw_html).not.toBeNull()
+  })
+
+  it("records html_size_bytes", async () => {
+    const html = "<html>" + "x".repeat(5000) + "</html>"
+    mockFetch.mockResolvedValueOnce(mockFetchResponse(html))
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.html_size_bytes).toBe(html.length)
+  })
+
+  it("captures server header", async () => {
+    mockFetch.mockResolvedValueOnce(
+      mockFetchResponse("<html></html>", { server: "Apache/2.4" })
+    )
+
+    const result = await scanTier1("https://example.com")
+
+    expect(result.server_header).toBe("Apache/2.4")
+  })
+})
+
+// ---------------------------------------------------------------------------
+// scanTier1Batch
+// ---------------------------------------------------------------------------
+describe("scanTier1Batch", () => {
+  let mockFetch: ReturnType<typeof vi.fn>
+
+  beforeEach(() => {
+    mockFetch = vi.fn()
+    vi.stubGlobal("fetch", mockFetch)
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it("yields results for all URLs", async () => {
+    mockFetch.mockResolvedValue(
+      mockFetchResponse("<html></html>")
+    )
+
+    const urls = [
+      "https://a.com",
+      "https://b.com",
+      "https://c.com",
+    ]
+
+    const results: any[] = []
+    for await (const result of scanTier1Batch(urls)) {
+      results.push(result)
+    }
+
+    expect(results).toHaveLength(3)
+    expect(results.every((r) => r.reachable)).toBe(true)
+  })
+
+  it("handles mixed success and failure in a batch", async () => {
+    mockFetch
+      .mockResolvedValueOnce(mockFetchResponse("<html></html>"))
+      .mockRejectedValueOnce(new TypeError("fetch failed"))
+      .mockResolvedValueOnce(mockFetchResponse("<html></html>"))
+
+    const results: any[] = []
+    for await (const result of scanTier1Batch([
+      "https://a.com",
+      "https://b.com",
+      "https://c.com",
+    ])) {
+      results.push(result)
+    }
+
+    expect(results).toHaveLength(3)
+    expect(results[0].reachable).toBe(true)
+    expect(results[1].reachable).toBe(false)
+    expect(results[2].reachable).toBe(true)
+  })
+
+  it("respects concurrency limit of 5", async () => {
+    let concurrentCalls = 0
+    let maxConcurrent = 0
+
+    mockFetch.mockImplementation(async () => {
+      concurrentCalls++
+      maxConcurrent = Math.max(maxConcurrent, concurrentCalls)
+      await new Promise((r) => setTimeout(r, 10))
+      concurrentCalls--
+      return mockFetchResponse("<html></html>")
+    })
+
+    const urls = Array.from({ length: 12 }, (_, i) => `https://site${i}.com`)
+    const results: any[] = []
+    for await (const result of scanTier1Batch(urls)) {
+      results.push(result)
+    }
+
+    expect(results).toHaveLength(12)
+    expect(maxConcurrent).toBeLessThanOrEqual(5)
+  })
+})
diff --git a/lib/scout/__tests__/tier2.test.ts b/lib/scout/__tests__/tier2.test.ts
new file mode 100644
index 0000000..65d1dbd
--- /dev/null
+++ b/lib/scout/__tests__/tier2.test.ts
@@ -0,0 +1,196 @@
+import { vi, beforeEach, afterEach } from "vitest"
+import { qualifyTier2 } from "../tier2"
+import type { Tier1Result } from "../types"
+
+// ---------------------------------------------------------------------------
+// Mock the AI SDK
+// ---------------------------------------------------------------------------
+const mockGenerateObject = vi.fn()
+vi.mock("ai", () => ({
+  generateObject: (...args: any[]) => mockGenerateObject(...args),
+}))
+
+vi.mock("@ai-sdk/google", () => ({
+  google: (model: string) => ({ modelId: model }),
+}))
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeTier1(overrides: Partial<Tier1Result> = {}): Tier1Result {
+  return {
+    url: "https://example.com",
+    domain: "example.com",
+    reachable: true,
+    status_code: 200,
+    response_time_ms: 350,
+    is_vercel: false,
+    is_nextjs: true,
+    is_react: true,
+    js_framework_signal: "/_next/",
+    server_header: "nginx",
+    cdn_signal: "cloudfront",
+    cdn_evidence: "x-amz-cf-id header",
+    html_size_bytes: 45000,
+    verdict: "promote",
+    skip_reason: null,
+    priority_boost: false,
+    confidence: "high",
+    raw_html: '<html><script src="/_next/static/main.js" stripped-inline></script></html>',
+    raw_headers: {
+      server: "nginx",
+      "x-amz-cf-id": "abc123",
+    },
+    ...overrides,
+  }
+}
+
+const VALID_GEMINI_RESPONSE = {
+  framework: "Next.js",
+  framework_version: "14",
+  framework_confidence: "high" as const,
+  framework_evidence: "/_next/static/chunks/ paths found in HTML",
+  is_nextjs: true,
+  nextjs_self_hosted: true,
+  uses_app_router: true,
+  hosting: "AWS",
+  hosting_confidence: "high" as const,
+  hosting_evidence: "x-amz-cf-id header indicates CloudFront/AWS",
+  cdn: "CloudFront",
+  commerce_platform: null,
+  commerce_evidence: null,
+  cms: "Contentful",
+  cms_evidence: "Contentful SDK script detected",
+  other_integrations: ["Segment", "Sentry"],
+  composable_maturity: "headless" as const,
+  industry_vertical: "SaaS",
+  estimated_size: "mid-market" as const,
+  b2b_or_b2c: "B2B" as const,
+  deal_score: 82,
+  one_line_summary:
+    "Next.js 14 self-hosted on AWS with Contentful CMS. 350ms response time. Strong Vercel fit.",
+  executive_paragraph:
+    "Mid-market B2B SaaS company running self-hosted Next.js 14 on AWS with CloudFront CDN and Contentful CMS. The headless architecture and App Router adoption indicate modern frontend practices. Migration to Vercel would reduce infrastructure management overhead and improve TTFB through edge delivery. Recommend scheduling a discovery call.",
+  promote_to_tier3: true,
+  promotion_rationale:
+    "Self-hosted Next.js with high deal score warrants full analysis.",
+}
+
+// ---------------------------------------------------------------------------
+// qualifyTier2
+// ---------------------------------------------------------------------------
+describe("qualifyTier2", () => {
+  beforeEach(() => {
+    mockGenerateObject.mockReset()
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it("returns a Tier2Result from a successful Gemini call", async () => {
+    mockGenerateObject.mockResolvedValueOnce({ object: VALID_GEMINI_RESPONSE })
+
+    const result = await qualifyTier2(makeTier1())
+
+    expect(result.domain).toBe("example.com")
+    expect(result.url).toBe("https://example.com")
+    expect(result.framework).toBe("Next.js")
+    expect(result.deal_score).toBe(82)
+    expect(result.promote_to_tier3).toBe(true)
+    expect(result.hosting).toBe("AWS")
+    expect(result.cms).toBe("Contentful")
+    expect(result.composable_maturity).toBe("headless")
+  })
+
+  it("returns degraded result when raw_html is null", async () => {
+    const result = await qualifyTier2(makeTier1({ raw_html: null }))
+
+    expect(result.deal_score).toBe(0)
+    expect(result.promote_to_tier3).toBe(false)
+    expect(result.one_line_summary).toContain("No HTML content")
+    // Should not call Gemini
+    expect(mockGenerateObject).not.toHaveBeenCalled()
+  })
+
+  it("returns degraded result when raw_html is empty", async () => {
+    const result = await qualifyTier2(makeTier1({ raw_html: "  " }))
+
+    expect(result.deal_score).toBe(0)
+    expect(result.promote_to_tier3).toBe(false)
+    expect(mockGenerateObject).not.toHaveBeenCalled()
+  })
+
+  it("returns degraded result when Gemini throws", async () => {
+    mockGenerateObject.mockRejectedValueOnce(
+      new Error("Rate limit exceeded")
+    )
+
+    const result = await qualifyTier2(makeTier1())
+
+    expect(result.deal_score).toBe(0)
+    expect(result.promote_to_tier3).toBe(false)
+    expect(result.one_line_summary).toContain("Gemini qualification failed")
+    expect(result.domain).toBe("example.com")
+  })
+
+  it("passes correct prompt data to Gemini", async () => {
+    mockGenerateObject.mockResolvedValueOnce({ object: VALID_GEMINI_RESPONSE })
+
+    await qualifyTier2(
+      makeTier1({
+        domain: "testsite.io",
+        response_time_ms: 1200,
+        is_nextjs: true,
+        is_react: false,
+        cdn_signal: "cloudflare",
+        server_header: "Apache",
+      })
+    )
+
+    expect(mockGenerateObject).toHaveBeenCalledTimes(1)
+    const callArgs = mockGenerateObject.mock.calls[0][0]
+    const prompt = callArgs.prompt as string
+
+    expect(prompt).toContain("testsite.io")
+    expect(prompt).toContain("1200ms")
+    expect(prompt).toContain("Next.js detected: true")
+    expect(prompt).toContain("React detected: false")
+    expect(prompt).toContain("CDN: cloudflare")
+    expect(prompt).toContain("Server: Apache")
+  })
+
+  it("uses the gemini flash model", async () => {
+    mockGenerateObject.mockResolvedValueOnce({ object: VALID_GEMINI_RESPONSE })
+
+    await qualifyTier2(makeTier1())
+
+    const callArgs = mockGenerateObject.mock.calls[0][0]
+    expect(callArgs.model.modelId).toContain("gemini")
+  })
+
+  it("maps promotion_rationale to rationale field", async () => {
+    mockGenerateObject.mockResolvedValueOnce({
+      object: {
+        ...VALID_GEMINI_RESPONSE,
+        promotion_rationale: "Strong self-hosted Next.js candidate",
+      },
+    })
+
+    const result = await qualifyTier2(makeTier1())
+
+    expect(result.rationale).toBe("Strong self-hosted Next.js candidate")
+  })
+
+  it("preserves url and domain from tier1 input", async () => {
+    mockGenerateObject.mockResolvedValueOnce({ object: VALID_GEMINI_RESPONSE })
+
+    const result = await qualifyTier2(
+      makeTier1({ url: "https://custom.io/path", domain: "custom.io" })
+    )
+
+    expect(result.url).toBe("https://custom.io/path")
+    expect(result.domain).toBe("custom.io")
+  })
+})
diff --git a/lib/scout/pipeline.ts b/lib/scout/pipeline.ts
new file mode 100644
index 0000000..4f9bf5a
--- /dev/null
+++ b/lib/scout/pipeline.ts
@@ -0,0 +1,286 @@
+import { scanTier1Batch } from "./tier1"
+import { qualifyTier2 } from "./tier2"
+import { extractDomain } from "@/lib/utils"
+import { cortexSearch, cortexStore } from "@/lib/cortex"
+import { entityTag } from "@/lib/utils"
+import { storeInCortex } from "@/lib/cortex-store-pipeline"
+import { fetchPage } from "@/lib/fetcher"
+import { getPerformanceMetrics } from "@/lib/pagespeed"
+import {
+  detectTechStack,
+  qualifyProspect,
+  engineerValue,
+  designArchitecture,
+} from "@/lib/gemini"
+import { cortexSearchPriorPatterns } from "@/lib/cortex"
+import type { Tier1Result, Tier2Result, ScoutResult, ScoutStreamEvent } from "./types"
+
+interface ScoutOptions {
+  tier3_limit?: number
+  skip_vercel?: boolean
+  skip_tier3?: boolean
+}
+
+/**
+ * Run the Scout three-tier qualification funnel.
+ * Yields stream events for each result as it completes.
+ */
+export async function* runScout(
+  urls: string[],
+  options?: ScoutOptions
+): AsyncGenerator<ScoutStreamEvent> {
+  const tier3Limit = options?.tier3_limit ?? 5
+  const skipVercel = options?.skip_vercel ?? true
+  const skipTier3 = options?.skip_tier3 ?? false
+
+  const scanId = crypto.randomUUID()
+  const startedAt = new Date().toISOString()
+
+  // Deduplicate by domain
+  const seen = new Set<string>()
+  const dedupedUrls: string[] = []
+  for (const url of urls) {
+    let domain: string
+    try {
+      const normalised = /^https?:\/\//i.test(url.trim())
+        ? url.trim()
+        : `https://${url.trim()}`
+      domain = extractDomain(normalised)
+    } catch {
+      domain = url.trim()
+    }
+    if (!seen.has(domain)) {
+      seen.add(domain)
+      dedupedUrls.push(url)
+    }
+  }
+
+  const tier1Results: Tier1Result[] = []
+  const tier2Results: Tier2Result[] = []
+  const tier3Domains: string[] = []
+
+  // Summary counters
+  let skippedVercel = 0
+  let skippedUnreachable = 0
+  let skippedNoJs = 0
+  let skippedOther = 0
+
+  // --- Check Cortex for previously analysed domains ---
+  const previouslyAnalysed = new Map<string, number>()
+  try {
+    for (const domain of seen) {
+      const searchResult = await cortexSearch(domain, 5)
+      const prospectNode = searchResult.results.find(
+        (n: any) =>
+          (n.kind === "Prospect" || n.kind === "prospect") &&
+          n.tags?.includes(entityTag(domain))
+      )
+      if (prospectNode) {
+        try {
+          const body = JSON.parse(prospectNode.body)
+          if (body?.deal_score != null) {
+            previouslyAnalysed.set(domain, body.deal_score)
+          }
+        } catch {
+          // ignore parse failure
+        }
+      }
+    }
+  } catch {
+    // Cortex may be unavailable — continue without
+  }
+
+  // --- TIER 1: Header scan ---
+  const toTier2: Tier1Result[] = []
+
+  for await (const result of scanTier1Batch(dedupedUrls)) {
+    tier1Results.push(result)
+
+    // Check if previously analysed
+    if (previouslyAnalysed.has(result.domain)) {
+      const priorScore = previouslyAnalysed.get(result.domain)!
+      // Yield as tier2 with prior data
+      const priorResult: Tier2Result = {
+        url: result.url,
+        domain: result.domain,
+        framework: result.js_framework_signal ?? "unknown",
+        framework_confidence: "medium",
+        framework_evidence: "Previously analysed in Lighthouse",
+        hosting: "unknown",
+        hosting_confidence: "low",
+        cdn: result.cdn_signal ?? "unknown",
+        commerce_platform: null,
+        cms: null,
+        composable_maturity: "monolithic",
+        industry_vertical: "unknown",
+        estimated_size: "unknown",
+        deal_score: priorScore,
+        one_line_summary: `Previously analysed, score: ${priorScore}/100`,
+        executive_paragraph: `This domain was previously analysed in Lighthouse with a deal score of ${priorScore}/100. View the full report for details.`,
+        promote_to_tier3: false,
+        rationale: "Already has full Lighthouse analysis",
+      }
+      tier2Results.push(priorResult)
+      yield { stage: "tier2", url: result.url, data: priorResult }
+      continue
+    }
+
+    yield { stage: "tier1", url: result.url, data: result }
+
+    if (result.verdict === "skip") {
+      if (result.skip_reason === "Already on Vercel") skippedVercel++
+      else if (result.skip_reason === "Unreachable") skippedUnreachable++
+      else skippedOther++
+    } else {
+      // promote and maybe both go to Tier 2
+      toTier2.push(result)
+    }
+  }
+
+  // --- TIER 2: Quick Gemini qualification ---
+  const TIER2_CONCURRENCY = 3
+
+  for (let i = 0; i < toTier2.length; i += TIER2_CONCURRENCY) {
+    const batch = toTier2.slice(i, i + TIER2_CONCURRENCY)
+    const results = await Promise.allSettled(batch.map((t1) => qualifyTier2(t1)))
+
+    for (let j = 0; j < results.length; j++) {
+      const result = results[j]
+      const t1 = batch[j]
+
+      if (result.status === "fulfilled") {
+        tier2Results.push(result.value)
+        yield { stage: "tier2", url: t1.url, data: result.value }
+
+        // Store qualified prospects in Cortex (score >= 50)
+        if (result.value.deal_score >= 50 && !previouslyAnalysed.has(result.value.domain)) {
+          try {
+            await cortexStore({
+              kind: "scout-prospect",
+              title: `${result.value.domain}: Score ${result.value.deal_score}, ${result.value.one_line_summary}`,
+              body: JSON.stringify(result.value),
+              importance: result.value.deal_score / 100,
+              tags: [
+                entityTag(result.value.domain),
+                "scout",
+                result.value.industry_vertical,
+                result.value.composable_maturity,
+              ],
+              source_agent: "lighthouse-scout",
+            })
+          } catch {
+            // Non-fatal: Cortex storage failure shouldn't break the pipeline
+          }
+        }
+      } else {
+        // Gemini failed for this URL
+        yield {
+          stage: "error",
+          url: t1.url,
+          data: { message: `Tier 2 failed for ${t1.domain}: ${result.reason}` },
+        }
+      }
+    }
+  }
+
+  // Sort by deal score descending
+  tier2Results.sort((a, b) => b.deal_score - a.deal_score)
+
+  // --- TIER 3: Full Lighthouse analysis (optional) ---
+  if (!skipTier3 && tier3Limit > 0) {
+    const candidates = tier2Results
+      .filter((r) => r.promote_to_tier3 && !previouslyAnalysed.has(r.domain))
+      .slice(0, tier3Limit)
+
+    for (const candidate of candidates) {
+      try {
+        yield { stage: "tier3", url: candidate.url, data: { message: `Starting full analysis: ${candidate.domain}` } }
+
+        // Run the existing Lighthouse pipeline directly
+        const { html, headers } = await fetchPage(candidate.url)
+        const [techStack, perf] = await Promise.all([
+          detectTechStack(html, headers, candidate.domain),
+          getPerformanceMetrics(candidate.url),
+        ])
+        const qualification = await qualifyProspect(candidate.domain, techStack, perf)
+        const priorPatterns = await cortexSearchPriorPatterns(techStack)
+        const valueEngineering = await engineerValue(
+          candidate.domain,
+          techStack,
+          perf,
+          qualification,
+          priorPatterns
+        )
+        const architecture = await designArchitecture(
+          candidate.domain,
+          techStack,
+          perf,
+          valueEngineering
+        )
+
+        // Store in Cortex
+        await storeInCortex(candidate.domain, candidate.url, {
+          techStack,
+          performance: perf,
+          qualification,
+          valueEngineering,
+          architecture,
+        })
+
+        tier3Domains.push(candidate.domain)
+        yield { stage: "tier3", url: candidate.url, data: { message: `Full analysis complete: ${candidate.domain}` } }
+      } catch (error) {
+        const msg = error instanceof Error ? error.message : String(error)
+        yield {
+          stage: "error",
+          url: candidate.url,
+          data: { message: `Tier 3 failed for ${candidate.domain}: ${msg}` },
+        }
+      }
+    }
+  }
+
+  // Count skips for URLs that had no JS framework in tier1 and didn't get promoted
+  for (const t1 of tier1Results) {
+    if (t1.verdict === "maybe" && !tier2Results.some((t2) => t2.domain === t1.domain)) {
+      skippedNoJs++
+    }
+  }
+
+  // --- COMPLETE ---
+  const completedAt = new Date().toISOString()
+  const scoutResult: ScoutResult = {
+    scan_id: scanId,
+    started_at: startedAt,
+    completed_at: completedAt,
+    input_count: dedupedUrls.length,
+    tier1_results: tier1Results,
+    tier2_results: tier2Results,
+    tier3_domains: tier3Domains,
+    summary: {
+      total: dedupedUrls.length,
+      promoted_to_tier2: toTier2.length,
+      promoted_to_tier3: tier3Domains.length,
+      skipped_vercel: skippedVercel,
+      skipped_unreachable: skippedUnreachable,
+      skipped_no_js: skippedNoJs,
+      skipped_other: skippedOther,
+    },
+  }
+
+  // Store scan summary in Cortex
+  try {
+    await cortexStore({
+      kind: "scout-scan",
+      title: `Scout scan: ${dedupedUrls.length} URLs, ${tier2Results.length} qualified, top: ${tier2Results.slice(0, 3).map((r) => r.domain).join(", ")}`,
+      body: JSON.stringify(scoutResult.summary),
+      importance: 0.6,
+      tags: ["scout", "territory-scan"],
+      source_agent: "lighthouse-scout",
+    })
+  } catch {
+    // Non-fatal
+  }
+
+  yield { stage: "complete", data: scoutResult }
+}
diff --git a/lib/scout/tier1.ts b/lib/scout/tier1.ts
new file mode 100644
index 0000000..27be058
--- /dev/null
+++ b/lib/scout/tier1.ts
@@ -0,0 +1,224 @@
+import { isValidPublicUrl, extractDomain } from "@/lib/utils"
+import { sanitiseForLLMCompact } from "@/lib/sanitise"
+import type { Tier1Result } from "./types"
+
+/**
+ * Normalise a URL: ensure https://, strip trailing slash.
+ */
+function normaliseUrl(raw: string): string {
+  let url = raw.trim()
+  if (!/^https?:\/\//i.test(url)) {
+    url = `https://${url}`
+  }
+  return url.replace(/\/+$/, "")
+}
+
+/**
+ * Tier 1 header scan — no LLM, no external APIs beyond the target URL.
+ * Pure fetch + header/HTML signal analysis. < 1s per URL.
+ */
+export async function scanTier1(rawUrl: string): Promise<Tier1Result> {
+  const url = normaliseUrl(rawUrl)
+
+  let domain: string
+  try {
+    domain = extractDomain(url)
+  } catch {
+    return skipResult(url, rawUrl, "Invalid URL")
+  }
+
+  if (!isValidPublicUrl(url)) {
+    return skipResult(url, domain, "Invalid or non-public URL")
+  }
+
+  const start = performance.now()
+  let response: Response
+  try {
+    response = await fetch(url, {
+      signal: AbortSignal.timeout(8_000),
+      headers: {
+        "User-Agent": "Mozilla/5.0 (compatible; Lighthouse-Scout/1.0)",
+        Accept: "text/html",
+      },
+      redirect: "follow",
+    })
+  } catch {
+    return skipResult(url, domain, "Unreachable")
+  }
+  const responseTime = Math.round(performance.now() - start)
+
+  const headers: Record<string, string> = {}
+  response.headers.forEach((v, k) => {
+    headers[k.toLowerCase()] = v
+  })
+
+  let html: string
+  try {
+    html = await response.text()
+  } catch {
+    return skipResult(url, domain, "Failed to read response body")
+  }
+
+  const htmlSlice = html.slice(0, 100_000)
+
+  // --- Header signals ---
+  const isVercel = !!headers["x-vercel-id"]
+  const serverHeader = headers["server"] ?? null
+
+  // CDN detection
+  let cdnSignal: string | null = null
+  let cdnEvidence: string | null = null
+  if (headers["cf-ray"]) {
+    cdnSignal = "cloudflare"
+    cdnEvidence = "cf-ray header"
+  } else if (headers["x-amz-cf-id"] || headers["x-amz-cf-pop"]) {
+    cdnSignal = "cloudfront"
+    cdnEvidence = headers["x-amz-cf-id"] ? "x-amz-cf-id header" : "x-amz-cf-pop header"
+  } else if (headers["x-served-by"]?.includes("cache-")) {
+    cdnSignal = "fastly"
+    cdnEvidence = "x-served-by header contains cache-"
+  } else if (Object.keys(headers).some((h) => h.startsWith("x-akamai"))) {
+    cdnSignal = "akamai"
+    cdnEvidence = "x-akamai-* header"
+  } else if (headers["x-vercel-cache"]) {
+    cdnSignal = "vercel"
+    cdnEvidence = "x-vercel-cache header"
+  }
+
+  // --- HTML signals ---
+  let isNextjs = false
+  let isReact = false
+  let frameworkSignal: string | null = null
+
+  if (htmlSlice.includes("/_next/")) {
+    isNextjs = true
+    frameworkSignal = "/_next/"
+  }
+  if (htmlSlice.includes("__NEXT_DATA__")) {
+    isNextjs = true
+    frameworkSignal = "__NEXT_DATA__"
+  }
+  if (htmlSlice.includes("data-reactroot") || htmlSlice.includes("__REACT_DEVTOOLS")) {
+    isReact = true
+    if (!frameworkSignal) frameworkSignal = "data-reactroot"
+  }
+  if (!frameworkSignal && htmlSlice.includes("/__nuxt/")) {
+    frameworkSignal = "/__nuxt/"
+  }
+  if (
+    !frameworkSignal &&
+    (htmlSlice.includes("/static/js/main") || htmlSlice.includes("/static/js/bundle"))
+  ) {
+    isReact = true
+    frameworkSignal = "/static/js/main"
+  }
+  if (!frameworkSignal && (htmlSlice.includes("ng-version") || htmlSlice.includes("ng-app"))) {
+    frameworkSignal = "angular"
+  }
+
+  // --- Verdict ---
+  let verdict: "promote" | "skip" | "maybe"
+  let skipReason: string | null = null
+
+  if (isVercel) {
+    verdict = "skip"
+    skipReason = "Already on Vercel"
+  } else if (isNextjs) {
+    verdict = "promote"
+  } else if (isReact) {
+    verdict = "promote"
+  } else if (frameworkSignal) {
+    verdict = "promote"
+  } else {
+    verdict = "maybe"
+  }
+
+  // --- Confidence ---
+  let confidence: "high" | "medium" | "low"
+  if (isNextjs && htmlSlice.includes("__NEXT_DATA__")) {
+    confidence = "high"
+  } else if (isNextjs) {
+    confidence = "medium"
+  } else if (isReact) {
+    confidence = "medium"
+  } else {
+    confidence = "low"
+  }
+
+  // --- Priority boost ---
+  const priorityBoost = responseTime > 500
+
+  // --- Store raw HTML for Tier 2 (only if not skipped) ---
+  const rawHtml =
+    verdict !== "skip" ? sanitiseForLLMCompact(htmlSlice) : null
+
+  return {
+    url,
+    domain,
+    reachable: true,
+    status_code: response.status,
+    response_time_ms: responseTime,
+    is_vercel: isVercel,
+    is_nextjs: isNextjs,
+    is_react: isReact,
+    js_framework_signal: frameworkSignal,
+    server_header: serverHeader,
+    cdn_signal: cdnSignal,
+    cdn_evidence: cdnEvidence,
+    html_size_bytes: html.length,
+    verdict,
+    skip_reason: skipReason,
+    priority_boost: priorityBoost,
+    confidence,
+    raw_html: rawHtml,
+    raw_headers: verdict !== "skip" ? headers : {},
+  }
+}
+
+function skipResult(url: string, domain: string, reason: string): Tier1Result {
+  return {
+    url,
+    domain,
+    reachable: false,
+    status_code: null,
+    response_time_ms: null,
+    is_vercel: false,
+    is_nextjs: false,
+    is_react: false,
+    js_framework_signal: null,
+    server_header: null,
+    cdn_signal: null,
+    cdn_evidence: null,
+    html_size_bytes: null,
+    verdict: "skip",
+    skip_reason: reason,
+    priority_boost: false,
+    confidence: "low",
+    raw_html: null,
+    raw_headers: {},
+  }
+}
+
+/**
+ * Run Tier 1 scans on a batch of URLs with concurrency limit of 5.
+ * Yields results as they complete.
+ */
+export async function* scanTier1Batch(
+  urls: string[]
+): AsyncGenerator<Tier1Result> {
+  const CONCURRENCY = 5
+
+  for (let i = 0; i < urls.length; i += CONCURRENCY) {
+    const batch = urls.slice(i, i + CONCURRENCY)
+    const results = await Promise.allSettled(batch.map((u) => scanTier1(u)))
+
+    for (const result of results) {
+      if (result.status === "fulfilled") {
+        yield result.value
+      } else {
+        // Should not happen since scanTier1 never throws, but be safe
+        yield skipResult(batch[0], batch[0], `Unexpected error: ${result.reason}`)
+      }
+    }
+  }
+}
diff --git a/lib/scout/tier2-schema.ts b/lib/scout/tier2-schema.ts
new file mode 100644
index 0000000..e75200b
--- /dev/null
+++ b/lib/scout/tier2-schema.ts
@@ -0,0 +1,121 @@
+import { z } from "zod"
+
+/**
+ * Compressed Zod schema for Tier 2 quick qualification.
+ * One Gemini call returns all of this in a single structured response.
+ * Flat fields where possible — no nested objects unless necessary.
+ */
+export const Tier2Schema = z.object({
+  framework: z
+    .string()
+    .describe(
+      "Primary frontend framework: Next.js, React, Vue, Nuxt, Angular, Astro, Svelte, static HTML, WordPress, unknown"
+    ),
+  framework_version: z.string().optional(),
+  framework_confidence: z.enum(["high", "medium", "low"]),
+  framework_evidence: z
+    .string()
+    .describe(
+      'Exact signal: "/_next/static/chunks/ paths found", "__NEXT_DATA__ script tag present", etc.'
+    ),
+
+  is_nextjs: z.boolean(),
+  nextjs_self_hosted: z
+    .boolean()
+    .optional()
+    .describe("True if Next.js but NOT on Vercel"),
+  uses_app_router: z.boolean().optional(),
+
+  hosting: z
+    .string()
+    .describe(
+      "AWS, GCP, Azure, Netlify, Cloudflare, Vercel, self-hosted, unknown"
+    ),
+  hosting_confidence: z.enum(["high", "medium", "low"]),
+  hosting_evidence: z.string(),
+
+  cdn: z
+    .string()
+    .describe(
+      "CloudFront, Cloudflare, Fastly, Akamai, Vercel Edge, none detected"
+    ),
+
+  commerce_platform: z
+    .string()
+    .nullable()
+    .describe(
+      "BigCommerce, Shopify, Salesforce Commerce Cloud, commercetools, Medusa, Saleor, none detected. Be specific about headless vs monolithic usage."
+    ),
+  commerce_evidence: z.string().nullable(),
+
+  cms: z
+    .string()
+    .nullable()
+    .describe(
+      "Contentful, Sanity, Storyblok, Prismic, WordPress, Builder.io, DatoCMS, none detected"
+    ),
+  cms_evidence: z.string().nullable(),
+
+  other_integrations: z
+    .array(z.string())
+    .describe(
+      "Other detected: Algolia, Stripe, Auth0, Segment, LaunchDarkly, Sentry, etc. Just names, no detail needed."
+    ),
+
+  composable_maturity: z.enum([
+    "monolithic",
+    "partially-decoupled",
+    "headless",
+    "fully-composable",
+  ]),
+
+  industry_vertical: z
+    .string()
+    .describe(
+      "e-commerce, SaaS, fintech, media, healthcare, education, government, agency, other"
+    ),
+  estimated_size: z.enum([
+    "startup",
+    "scaleup",
+    "mid-market",
+    "enterprise",
+    "unknown",
+  ]),
+  b2b_or_b2c: z.enum(["B2B", "B2C", "both", "unknown"]),
+
+  deal_score: z
+    .number()
+    .min(0)
+    .max(100)
+    .describe(
+      "Vercel deal attractiveness. 80+ = strong fit, 50-79 = worth investigating, <50 = deprioritise. " +
+        "Score high: self-hosted Next.js + poor performance + enterprise traffic + composable architecture. " +
+        "Score low: already on Vercel, non-JS framework, tiny static site, monolithic CMS with no decoupling intent."
+    ),
+
+  one_line_summary: z
+    .string()
+    .describe(
+      'One sentence. Format: "[Framework] on [hosting], [commerce/CMS if relevant]. [Key metric or signal]. [Fit assessment]." ' +
+        'Example: "Next.js 14 self-hosted on AWS, headless Shopify + Contentful. 920ms TTFB. Strong Vercel fit." ' +
+        'No hedging. No "appears to be" or "seems like". State what was detected and at what confidence.'
+    ),
+
+  executive_paragraph: z
+    .string()
+    .describe(
+      "One paragraph for an SE manager to read in 30 seconds. Structure: " +
+        "Who they are (vertical, estimated size). What they run (stack). " +
+        "Why they are a prospect (the specific problem Vercel solves). " +
+        "What the next move is (outreach, discovery call, add to nurture, skip). " +
+        "Reference specific data points. No marketing language. " +
+        "Sound like a senior SA who looked at the site, not a template."
+    ),
+
+  promote_to_tier3: z
+    .boolean()
+    .describe("Should this get a full Lighthouse analysis?"),
+  promotion_rationale: z.string(),
+})
+
+export type Tier2GeminiOutput = z.infer<typeof Tier2Schema>
diff --git a/lib/scout/tier2.ts b/lib/scout/tier2.ts
new file mode 100644
index 0000000..07107eb
--- /dev/null
+++ b/lib/scout/tier2.ts
@@ -0,0 +1,105 @@
+import { generateObject } from "ai"
+import { google } from "@ai-sdk/google"
+import { Tier2Schema } from "./tier2-schema"
+import { sanitiseForLLMCompact } from "@/lib/sanitise"
+import type { Tier1Result, Tier2Result } from "./types"
+
+/**
+ * Tier 2 quick qualification — one Gemini call per URL.
+ * Takes a Tier1Result with sanitised HTML and headers.
+ */
+export async function qualifyTier2(tier1: Tier1Result): Promise<Tier2Result> {
+  if (!tier1.raw_html || tier1.raw_html.trim().length === 0) {
+    return degradedResult(tier1, "No HTML content available for analysis")
+  }
+
+  // Belt-and-suspenders: ensure sanitisation + truncation
+  const html = sanitiseForLLMCompact(tier1.raw_html, 30_000)
+
+  try {
+    const { object } = await generateObject({
+      model: google("gemini-2.5-flash"),
+      schema: Tier2Schema,
+      prompt: `You are a Vercel Sales Engineer qualifying a prospect from their website HTML.
+This is a quick assessment, not a deep analysis. Be accurate and concise.
+
+CRITICAL RULES:
+- State confidence levels honestly. If you're guessing, say confidence: low.
+- Commerce detection must be specific. BigCommerce and Shopify have different
+  implications. BigCommerce headless vs BigCommerce monolithic matters.
+  Look for: bigcommerce.com scripts, shopify CDN URLs, SFCC patterns,
+  commercetools API patterns.
+- The one_line_summary must be one sentence. No semicolons to sneak in two.
+- The executive_paragraph must be one paragraph. No bullet points.
+- deal_score must reflect VERCEL FIT specifically, not general site quality.
+  A beautiful site on Vercel already scores low (no deal). An ugly site
+  with self-hosted Next.js and 1200ms TTFB scores high (easy migration, clear value).
+
+DOMAIN: ${tier1.domain}
+
+HTTP RESPONSE HEADERS:
+${JSON.stringify(tier1.raw_headers, null, 2)}
+
+RESPONSE TIME: ${tier1.response_time_ms}ms
+
+HTML SOURCE (sanitised, first 30000 chars):
+${html}
+
+HEADER SCAN SIGNALS:
+- Next.js detected: ${tier1.is_nextjs}
+- React detected: ${tier1.is_react}
+- Framework signal: ${tier1.js_framework_signal ?? "none"}
+- CDN: ${tier1.cdn_signal ?? "none"}
+- Server: ${tier1.server_header ?? "none"}
+- Response time: ${tier1.response_time_ms}ms`,
+    })
+
+    return {
+      url: tier1.url,
+      domain: tier1.domain,
+      framework: object.framework,
+      framework_confidence: object.framework_confidence,
+      framework_evidence: object.framework_evidence,
+      hosting: object.hosting,
+      hosting_confidence: object.hosting_confidence,
+      cdn: object.cdn,
+      commerce_platform: object.commerce_platform,
+      cms: object.cms,
+      composable_maturity: object.composable_maturity,
+      industry_vertical: object.industry_vertical,
+      estimated_size: object.estimated_size,
+      deal_score: object.deal_score,
+      one_line_summary: object.one_line_summary,
+      executive_paragraph: object.executive_paragraph,
+      promote_to_tier3: object.promote_to_tier3,
+      rationale: object.promotion_rationale,
+    }
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : String(error)
+    console.error(`qualifyTier2: Gemini failed for ${tier1.domain}`, error)
+    return degradedResult(tier1, `Gemini qualification failed: ${msg}`)
+  }
+}
+
+function degradedResult(tier1: Tier1Result, reason: string): Tier2Result {
+  return {
+    url: tier1.url,
+    domain: tier1.domain,
+    framework: tier1.js_framework_signal ?? "unknown",
+    framework_confidence: "low",
+    framework_evidence: reason,
+    hosting: "unknown",
+    hosting_confidence: "low",
+    cdn: tier1.cdn_signal ?? "unknown",
+    commerce_platform: null,
+    cms: null,
+    composable_maturity: "monolithic",
+    industry_vertical: "unknown",
+    estimated_size: "unknown",
+    deal_score: 0,
+    one_line_summary: reason,
+    executive_paragraph: reason,
+    promote_to_tier3: false,
+    rationale: reason,
+  }
+}
diff --git a/lib/scout/types.ts b/lib/scout/types.ts
new file mode 100644
index 0000000..a92e5af
--- /dev/null
+++ b/lib/scout/types.ts
@@ -0,0 +1,75 @@
+// ---------------------------------------------------------------------------
+// Scout — shared types for the territory qualification pipeline
+// ---------------------------------------------------------------------------
+
+export interface Tier1Result {
+  url: string
+  domain: string
+  reachable: boolean
+  status_code: number | null
+  response_time_ms: number | null
+  is_vercel: boolean
+  is_nextjs: boolean
+  is_react: boolean
+  js_framework_signal: string | null
+  server_header: string | null
+  cdn_signal: string | null
+  cdn_evidence: string | null
+  html_size_bytes: number | null
+  verdict: "promote" | "skip" | "maybe"
+  skip_reason: string | null
+  priority_boost: boolean
+  confidence: "high" | "medium" | "low"
+  raw_html: string | null
+  raw_headers: Record<string, string>
+}
+
+export interface Tier2Result {
+  url: string
+  domain: string
+  framework: string
+  framework_confidence: "high" | "medium" | "low"
+  framework_evidence: string
+  hosting: string
+  hosting_confidence: "high" | "medium" | "low"
+  cdn: string
+  commerce_platform: string | null
+  cms: string | null
+  composable_maturity:
+    | "monolithic"
+    | "partially-decoupled"
+    | "headless"
+    | "fully-composable"
+  industry_vertical: string
+  estimated_size: "startup" | "scaleup" | "mid-market" | "enterprise" | "unknown"
+  deal_score: number
+  one_line_summary: string
+  executive_paragraph: string
+  promote_to_tier3: boolean
+  rationale: string
+}
+
+export interface ScoutResult {
+  scan_id: string
+  started_at: string
+  completed_at: string
+  input_count: number
+  tier1_results: Tier1Result[]
+  tier2_results: Tier2Result[]
+  tier3_domains: string[]
+  summary: {
+    total: number
+    promoted_to_tier2: number
+    promoted_to_tier3: number
+    skipped_vercel: number
+    skipped_unreachable: number
+    skipped_no_js: number
+    skipped_other: number
+  }
+}
+
+export interface ScoutStreamEvent {
+  stage: "tier1" | "tier2" | "tier3" | "complete" | "error"
+  url?: string
+  data: Tier1Result | Tier2Result | ScoutResult | { message: string }
+}