timescale · tjgreen42 · Jan 7, 2026 · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026
diff --git a/Makefile b/Makefile
@@ -27,9 +27,10 @@ OBJS = \
 	src/segment/merge.o \
 	src/segment/docmap.o \
 	src/segment/source.o \
+	src/query/bmw.o \
+	src/query/score.o \
 	src/types/vector.o \
 	src/types/query.o \
-	src/types/score.o \
 	src/state/state.o \
 	src/state/registry.o \
 	src/state/metapage.o \
@@ -48,7 +49,7 @@ PG_CPPFLAGS = -I$(srcdir)/src -g -O2 -Wall -Wextra -Wunused-function -Wunused-va
 # PG_CPPFLAGS += -DDEBUG_DUMP_INDEX
 
 # Test configuration
-REGRESS = aerodocs basic deletion vacuum dropped empty implicit index inheritance limits lock manyterms memory merge mixed partitioned queries schema scoring1 scoring2 scoring3 scoring4 scoring5 scoring6 segment strings unsupported updates vector unlogged_index
+REGRESS = aerodocs basic bmw deletion vacuum dropped empty implicit index inheritance limits lock manyterms memory merge mixed partitioned queries schema scoring1 scoring2 scoring3 scoring4 scoring5 scoring6 segment strings unsupported updates vector unlogged_index wand
 REGRESS_OPTS = --inputdir=test --outputdir=test
 
 PG_CONFIG = pg_config

diff --git a/OPTIMIZATION_ROADMAP.md b/OPTIMIZATION_ROADMAP.md
@@ -729,11 +729,56 @@ carrying compatibility code for formats that may never see production use.
 - [ ] Query-time block-aware seek operation
 
 ### v0.3.0: Block-Based Query Executor
-- [ ] Block max score computation at query time
-- [ ] Query executor (WAND or MAXSCORE based on benchmarks)
-- [ ] Single-term optimization path
-- [ ] Threshold-based block skipping
-- [ ] Benchmarks comparing old vs new query path
+- [x] Block max score computation at query time
+- [x] Query executor (initial BMW implementation)
+- [x] Single-term optimization path
+- [x] Threshold-based block skipping
+- [x] Benchmarks comparing old vs new query path
+- [x] GUC variables for BMW enable/disable and stats logging
+- [ ] **Doc-ID ordered traversal** (see note below)
+
+**Note on current BMW limitations**:
+
+The v0.3.0 BMW implementation has two related limitations:
+
+1. **Block-index iteration instead of doc-ID iteration**: The multi-term BMW
+   iterates by block index (0, 1, 2, ...) rather than by doc ID. This assumes
+   blocks across different terms are aligned, which they are not—each term's
+   posting list has its own doc ID ranges. For short queries (1-4 terms), this
+   works because block skipping still helps. For long queries (8+ terms), terms
+   often have non-overlapping doc ID ranges, making block-index iteration
+   ineffective.
+
+2. **Single-block skipping only**: Even for single-term queries, we iterate
+   through blocks sequentially and skip one block at a time:
+   ```c
+   for (block_idx = 0; block_idx < block_count; block_idx++) {
+       if (block_max_scores[block_idx] < threshold)
+           continue;  // Skip THIS block, check next
+       // ... score block
+   }
+   ```
+   We never use binary search on `last_doc_id` to jump over multiple blocks.
+   The skip entry infrastructure supports O(log n) seeking, but we only use it
+   for O(n) sequential iteration with single-block skips.
+
+**Where multi-block seeking matters**:
+- **WAND pivot advancement**: When advancing cursors to a pivot doc_id, binary
+  search could skip hundreds of blocks instead of checking each one
+- **Sparse term intersection**: Terms with non-overlapping ranges waste time
+  scanning blocks that can't possibly match
+- **Long posting lists**: A term with 10,000 blocks does 10,000 comparisons
+  instead of ~13 (log2) to find a target doc_id
+
+**The fix** requires WAND-style cursor-based traversal:
+1. Track each term's current doc ID position (not block index)
+2. Find minimum doc ID across all cursors (the "pivot")
+3. Binary search `last_doc_id` in skip entries to seek directly to target blocks
+4. Only load/score blocks that could contain documents at the pivot
+
+This is the standard BMW algorithm described in Phase 2 above; the current
+implementation is a simplified approximation that works well for common
+short-query workloads but degrades for long queries.
 
 ### v0.4.0: Compression
 - [ ] Delta encoding for doc IDs

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Modern ranked text search for Postgres.
 🚀 **Status**: v0.3.0-dev (prerelease) - Feature-complete but not yet optimized.
 Not yet recommended for production use. See [ROADMAP.md](ROADMAP.md) for what's next.
 
-![Tapir and Friends](images/tapir_and_friends_nye_2026.png)
+![Tapir and Friends](images/tapir_and_friends_v0.3.0-dev.png)
 
 ## Historical note
 

diff --git a/ROADMAP.md b/ROADMAP.md
@@ -10,36 +10,29 @@
 | v0.0.4 | Nov 2025 | BM25 score validation, PostgreSQL 18 support |
 | v0.0.5 | Dec 2025 | Segment infrastructure, auto-spill, hierarchical merging |
 | v0.1.0 | Dec 2025 | First open-source release, implicit index resolution, partitioned tables |
+| v0.2.0 | Dec 2025 | V2 segment format, skip index, doc ID mapping, benchmark suite |
 
-## v0.2.0 - Block Storage Foundation
+## Upcoming
 
-Block storage foundation for query optimizations.
-
-- **V2 segment format**: Block-based posting storage (128 docs/block)
-- **Skip index**: Per-block metadata (last_doc_id, max_tf, max_fieldnorm)
-- **Doc ID mapping**: Compact 4-byte segment-local IDs instead of 6-byte CTIDs
-- **Fieldnorm quantization**: 1-byte encoded document lengths
-- **Index build optimizations**: Binary search and direct mapping for CTID lookups
-- **Unlimited indexes**: dshash registry removes fixed limit on concurrent indexes
-- **Benchmark suite**: MS MARCO and Wikipedia benchmarks with public dashboard
-
-## Future
-
-### v0.3.0 - Query Optimizations
+### v0.3.0 - Query Optimizations (Jan 2026)
 
 Query-time performance improvements building on block storage.
 
-- **Block-Max WAND/MAXSCORE**: Early termination for top-k queries
-- **Threshold-based block skipping**: Skip blocks that can't contribute to top-k
+- **Block-Max WAND (BMW)**: Single-term and multi-term scoring with block skipping
+- **WAND-style doc-ID traversal**: Correct multi-term scoring via doc-ID ordered iteration
+- **Threshold-based block skipping**: Skip blocks where max score < threshold
+- **Block max score precomputation**: Per-term block upper bounds for pruning
+- **GUC variables**: `pg_textsearch.enable_bmw` and `log_bmw_stats` for debugging
+- **Benchmark results**: 4.3x faster than exhaustive, competitive with System X
 
-### v0.4.0 - Compression
+### v0.4.0 - Compression (Jan 2026)
 
 Reduce storage footprint via posting list compression.
 
 - **Delta encoding**: Compact doc ID storage
 - **FOR/PFOR**: Frame-of-reference encoding for posting blocks
 
-### v1.0.0 - Production Ready (Target: Feb 2026)
+### v1.0.0 - Production Ready (Feb 2026)
 
 First production-quality release.
 

diff --git a/images/tapir_and_friends.png → images/tapir_and_friends_v0.1.0.png b/images/tapir_and_friends.png → images/tapir_and_friends_v0.1.0.png
diff --git a/images/tapir_and_friends_nye_2026.png → images/tapir_and_friends_v0.2.0.png b/images/tapir_and_friends_nye_2026.png → images/tapir_and_friends_v0.2.0.png
diff --git a/images/tapir_and_friends_v0.3.0-dev.png b/images/tapir_and_friends_v0.3.0-dev.png
diff --git a/src/memtable/scan.c b/src/memtable/scan.c
@@ -13,11 +13,11 @@
 #include <utils/memutils.h>
 
 #include "memtable.h"
+#include "query/score.h"
 #include "scan.h"
 #include "state/limit.h"
 #include "state/metapage.h"
 #include "state/state.h"
-#include "types/score.h"
 #include "types/vector.h"
 
 /*

diff --git a/src/mod.c b/src/mod.c
@@ -42,6 +42,12 @@ extern int tp_default_limit;
 /* Global variable for score logging */
 bool tp_log_scores = false;
 
+/* Global variable for BMW stats logging - declared in query/score.c */
+bool tp_log_bmw_stats = false;
+
+/* Global variable to enable/disable BMW optimization - declared in score.c */
+bool tp_enable_bmw = true;
+
 /* Global variable for bulk load spill threshold (0 = disabled) */
 int tp_bulk_load_threshold = TP_DEFAULT_BULK_LOAD_THRESHOLD;
 
@@ -116,6 +122,32 @@ _PG_init(void)
 			NULL,
 			NULL);
 
+	DefineCustomBoolVariable(
+			"pg_textsearch.log_bmw_stats",
+			"Log Block-Max WAND statistics during queries",
+			"When enabled, logs blocks scanned/skipped and documents scored "
+			"for each query. Useful for understanding BMW optimization.",
+			&tp_log_bmw_stats,
+			false,		 /* default off */
+			PGC_USERSET, /* Can be changed per session */
+			0,
+			NULL,
+			NULL,
+			NULL);
+
+	DefineCustomBoolVariable(
+			"pg_textsearch.enable_bmw",
+			"Enable Block-Max WAND query optimization",
+			"When enabled, uses block-level upper bounds to skip "
+			"non-contributing blocks. Disable for benchmark comparison.",
+			&tp_enable_bmw,
+			true,		 /* default on */
+			PGC_USERSET, /* Can be changed per session */
+			0,
+			NULL,
+			NULL,
+			NULL);
+
 	DefineCustomIntVariable(
 			"pg_textsearch.bulk_load_threshold",
 			"Terms per transaction to trigger memtable spill",

diff --git a/src/planner/hooks.c b/src/planner/hooks.c
@@ -40,8 +40,8 @@
 #include <utils/syscache.h>
 
 #include "hooks.h"
+#include "query/score.h"
 #include "types/query.h"
-#include "types/score.h"
 
 /* Previous hooks in chain */
 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;