diff --git a/.gitignore b/.gitignore
index 3094469..b884c82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,113 @@
+# Rust specific
+/target/
+**/target/
+**/*.rs.bk
+Cargo.lock
+*.pdb
+
+# Protocol Buffers
+*.pb.h
+*.pb.cc
+*.pb.go
+*.pb.swift
+*.pb.dart
+*.pb.js
+*.pb.ts
+*.pb.rs
+
+# Generated Rust files
+/src/autocomplete_proto.rs
+/src/autocomplete_proto/*.rs
+
+# C++ specific
+*.o
+*.obj
+*.exe
+*.out
+*.app
+*.dll
+*.so
+*.dylib
+*.a
+*.lib
+*.d
+*.lo
+*.la
+*.lai
+*.Plo
+*.Pla
+*.l
+*.o
+*.obj
+*.elf
+*.bin
+*.hex
+*.map
+*.lst
+*.sym
+*.lss
+*.eep
+*.elf
+*.hex
+*.bin
+*.map
+*.lst
+*.sym
+*.lss
+*.eep
+*.elf
+*.hex
+*.bin
+*.map
+*.lst
+*.sym
+*.lss
+*.eep
+
+# Build directories
+/build/
+**/build/
+/debug_build/
+**/debug_build/
+/CMakeFiles/
+**/CMakeFiles/
+/CMakeCache.txt
+**/CMakeCache.txt
+/CMakeScripts/
+**/CMakeScripts/
+/Testing/
+**/Testing/
+/Makefile
+**/Makefile
+/cmake_install.cmake
+**/cmake_install.cmake
+/install_manifest.txt
+**/install_manifest.txt
+/compile_commands.json
+**/compile_commands.json
+/CTestTestfile.cmake
+**/CTestTestfile.cmake
+/_deps
+**/_deps
+/.cmake
+**/.cmake
+
+# IDE specific
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS specific
 .DS_Store
-build
+Thumbs.db
+
+# Project specific
+*.mapped
+*.mapped.stats
+*.dict
+*.inverted
+*.forward
+*.bin
+target/
diff --git a/.gitmodules b/.gitmodules
index 72f21cd..5b9dc7e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,9 @@
 [submodule "external/mongoose"]
 	path = external/mongoose
 	url = https://github.com/cesanta/mongoose.git
+[submodule "external/doctest"]
+	path = external/doctest
+	url = https://github.com/onqtam/doctest.git
+[submodule "external/cmd_line_parser"]
+	path = external/cmd_line_parser
+	url = https://github.com/jermp/cmd_line_parser.git
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..35abc20
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright 2019 Giulio Ermanno Pibiri
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
index b1a0946..624670f 100644
--- a/README.md
+++ b/README.md
@@ -1,177 +1,238 @@
-Autocomplete
-------------
+# Autocomplete System
 
-Query autocompletion in C++.
+This repository contains an autocomplete system implementation. The original C++ implementation is being ported to Rust and will be containerized for easier deployment and testing.
 
-##### Table of contents
-1. [Description](#descr)
-2. [Compiling the code](#compiling)
-3. [Input data format](#input)
-4. [Benchmarks](#benchmarks)
-5. [Live demo](#demo)
+## Project Structure
 
-Description <a name="descr"></a>
------------
+- `autocomplete-rs/`: The Rust port of the original C++ implementation
+- `archive/`: Original C++ implementation and related files
+
+## Goals
+
+1. Port the C++ implementation to Rust while maintaining the same functionality
+2. Leverage Rust's safety guarantees and modern tooling
+3. Containerize the application using Docker for easy deployment and testing
+
+## Current Status
+
+The porting process is ongoing. The following components have been ported to Rust:
+
+- Basic constants and configuration
+- Parameters management
+- Performance measurement probes
+
+## Building and Testing
 
-We designed two solutions (`autocomplete.hpp` and `autocomplete2.hpp`).
-The second solution avoids storing the forward index of the first solution.
+### Original C++ Implementation
+```bash
+cd archive
+make
+```
 
-Both solution build on two steps: (1) a prefix search (`prefix_topk`) and (2) a conjunctive search (`conjunctive_topk`).
+### Rust Implementation
+```bash
+cd autocomplete-rs
+cargo build
+cargo test
+```
 
-Recall that each completion has an associated integer identifier (henceforth, called docID), assigned in *decreasing* score order.
+## License
 
-#### 1. Prefix search
+This project is licensed under the MIT License - see the LICENSE file for details.
 
-This step returns the top-k completions that are prefixed by the terms in the query.
-For this purposes, we build a dictionary storing all completions seen as (multi-) sets of termIDs.
-Solution 1 uses an integer trie data structure (`completion_trie.hpp`);
-Solution 2 uses Front Coding (`integer_fc_dictionary.hpp`).
-We also materialize the list L of docIDs sorted by the lexicographical order of the completions (`unsorted_list.hpp`).
+Autocomplete
+------------
+
+A Query Auto-Completion system based on the paper *[Efficient and Effective Query Auto-Completion](https://dl.acm.org/doi/10.1145/3397271.3401432)*, by Simon Gog, Giulio Ermanno Pibiri, and Rossano Venturini,
+published in ACM SIGIR 2020.
+
+Please, cite the paper if you use the data structures from this library.
+
+##### Table of contents
+1. [Installation and quick start](#install)
+2. [Compiling the code](#compiling)
+3. [Input data format](#input)
+4. [Running the unit tests](#testing)
+5. [Building an index](#building)
+6. [Benchmarks](#benchmarks)
+7. [Live demo](#demo)
+
+Installation and quick start <a name="install"></a>
+------------------
 
-During a search, we first map the query terms to their lexicographic IDs by using a string dictionary (implemented as a 2-level index with Front Coding -- `fc_dictionary.hpp`). Then, we search the mapped query, say Q, into the completion trie to obtain the lexicographic range [l,r] of all completions that are children of Q. Then we need to identify the top-k docIDs from L[l,r]. Since the range [l,r] can be very large, we use a RMQ data structure built on L.
+Just run
 
-Having retrieved a list of (at most) k docIDs, we then:
+	bash ./install.sh
 
-1. Solution 1: use a forward index (`forward_index.hpp`) to materialize the identified completions into a string pool (`scored_string_pool.hpp`).
-The forward index stores the sorted (multi-) set of the termIDs of each completion, plus also the permutation of such termIDs in order to restore the original completion. The sets are stored in increasing-docID order.
-Specifically, we use the forward index to obtain the (permuted) set
-of termIDs and the string dictionary to extract the strings.
+from the parent directory. The script builds the code; prepare the test data in the folder `test_data/trec_05_efficiency_queries` for indexing; executes the unit tests.
 
-2. Solution 2: use a map from docIDs to lexicographic IDs. For every top-k docID, we extract the corresponding completion from the FC-based dictionary.
+After that, for having a minimal running example, just run
 
-#### 2. Conjunctive search
+	bash ./example.sh
 
-This step returns the top-k completions using an inverted index (`inverted_index.hpp`).
-For this purpose, let us consider a query Q[1,m] as tokenized into m terms (the last one possibly not completed).
-In this case we want to return the top-k (smallest) docIDs belonging
-to the intersection between the posting lists of the first m-1 terms
-and the union between all the postings lists of the terms that are
-prefixed by Q[m].
+and then access the service [from localhost](http://localhost:8000).
 
-To do so, we could trivially materialize the union and then proceed
-with the intersection.
-The clear problem with this approach is that the number of terms that are prefixed by Q[m] can be very large. Therefore iterating over the union can be overkilling.
+### Or you can use a prebuilt Docker image
 
-To solve this problem, we first obtain the lexicographic range of Q[m] by the string dictionary, say [l,r].
-We then iterate over the intersection of the first m-1 terms' posting lists and for each docID x we check whether the range [l,r] intersect the forward list of x. This check is done with the forward index.
-If the check succeeds, then x is among the top-k documents.
-We keep iterating over the intersection and checking the forward lists until we have k completions or we touch every docID in the intersection.
+The following command pulls a prebuilt Docker image and runs it locally.
 
-There is a special case for the case m = 1. In this case, we have no term before the last (only) one, thus we would check *all* forward lists for the range [l,r]. This is too expensive.
-Therefore, we use another RMQ data structure, built on the list, say M, of all the first (i.e., *minimal*) docIDs of the posting lists (think of it as the "first" column of the inverted index).
-A recursive heap-based algorithm is used to produce the smallest docIDs in M[l,r] using the RMQ data structure.
+	docker pull jermp/autocomplete
+	docker run -p 8000:8000 -d jermp/autocomplete
 
-The final string extraction step is identical to that of the
-prefix search.
+And then access the service [from localhost](http://localhost:8000).
 
 Compiling the code <a name="compiling"></a>
 ------------------
 
-The code is tested on Linux with `gcc` 7.4.0 and on Mac 10.14 with `clang` 10.0.0.
+The code has been tested on Linux with `gcc` 7.4.0, 8.3.0, 9.0.0, on Mac OS 10.14 and 12.4 with `clang` 10.0.0 and 13.0.0.
+
 To build the code, [`CMake`](https://cmake.org/) is required.
 
 Clone the repository with
 
-	$ git clone --recursive https://github.com/jermp/autocomplete.git
+	git clone --recursive https://github.com/jermp/autocomplete.git
 
 If you have cloned the repository without `--recursive`, you will need to perform the following commands before
 compiling:
 
-    $ git submodule init
-    $ git submodule update
+    git submodule init
+    git submodule update
 
 To compile the code for a release environment (see file `CMakeLists.txt` for the used compilation flags), it is sufficient to do the following:
 
-    $ mkdir build
-    $ cd build
-    $ cmake ..
-    $ make
+    mkdir build
+    cd build
+    cmake ..
+    make
 
-Hint: Use `make -j4` to compile the library in parallel using, e.g., 4 jobs.
+Hint: Use `make -j` to compile the library in parallel using all
+available threads.
 
 For the best of performance, we recommend compiling with:
 
-	$ `cmake .. -DCMAKE_BUILD_TYPE=Release -DUSE_SANITIZERS=Off -DUSE_INTRINSICS=On -DUSE_PDEP=On`
+	cmake .. -DCMAKE_BUILD_TYPE=Release -DUSE_SANITIZERS=Off -DUSE_INTRINSICS=On -DUSE_PDEP=On
 
 For a testing environment, use the following instead:
 
-    $ mkdir debug_build
-    $ cd debug_build
-    $ cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_SANITIZERS=On
-    $ make
-    
+    mkdir debug_build
+    cd debug_build
+    cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_SANITIZERS=On
+    make
+
 Input data format <a name="input"></a>
 -----------------
 
 The input file should list all completions in
 *lexicographical* order.
-For example, see the the file `test_data/trec05_efficiency_queries/trec05_efficiency_queries.completions`.
+For example, see the the file `test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions`.
 
 The first column represent the
 ID of the completion; the other columns contain the
 tokens separated by white spaces.
-(The IDs for the file `trec05_efficiency_queries.completions` are
+(The IDs for the file `trec_05_efficiency_queries.completions` are
 fake, i.e., they do not take into account any
 particular assignment.)
 
-The scripts in the directory `test_data` help in
-preparing the datasets for indexing:
+The script `preprocess.sh` in the directory `test_data` helps
+in preparing the data for indexing.
+Thus, from within the directory `test_data`, it is sufficient
+to do:
 
-1. The command
-	
-		$ extract_dict.py trec05_efficiency_queries/trec05_efficiency_queries.completions
-	
-	extract the dictionary
-from a file listing all completions in textual form.
+	bash preprocess.sh <test_collection> <num_queries>
 
-2. The command
+Therefore, for our example with `trec_05_efficiency_queries`, it would be:
 
-		$ python map_dataset.py trec05_efficiency_queries/trec05_efficiency_queries.completions
-		
-	maps strings to integer ids.
+	bash preprocess.sh trec_05_efficiency_queries/trec_05_efficiency_queries.completions 300
 
-3. The command
+The second argument in the example, i.e., 300, represents the
+number of completions (per completion size) that are drawn at
+random and could be used to query the indexes.
 
-		$ python build_stats.py trec05_efficiency_queries/trec05_efficiency_queries.completions.mapped
-		
-	calulcates the dataset statistics.
+If you run the script, you will get:
 
-4. The command
-
-		$ python build_inverted_and_forward.py trec05_efficiency_queries/trec05_efficiency_queries.completions
-		
-	builds the inverted and forward files.
-
-If you run the scripts in the reported order, you will get:
-
-- `trec05_efficiency_queries.completions.dict`: lists all the distinct
+- `trec_05_efficiency_queries.completions.dict`: lists all the distinct
 tokens in the completions sorted in lexicographical
 order.
 
-- `trec05_efficiency_queries.completions.mapped`: lists all completions
+- `trec_05_efficiency_queries.completions.mapped`: lists all completions
 whose tokens have been mapped to integer ids
 as assigned by a lexicographically-sorted
 string dictionary (that should be built from the
-tokens listed in `trec05_efficiency_queries.completions.dict`).
+tokens listed in `trec_05_efficiency_queries.completions.dict`).
 Each completion terminates with the id `0`.
 
-- `trec05_efficiency_queries.completions.mapped.stats` contains some
+- `trec_05_efficiency_queries.completions.mapped.stats` contains some
 statistics about the datasets, needed to build
 the data structures more efficiently.
 
 - `trec05_efficiency_queries.completions.inverted` is the inverted file.
 
-- `trec05_efficiency_queries.completions.forward` is the forward file. Note that each list is *not* sorted, thus the lists are the same as the ones contained in `trec05_efficiency_queries.completions.mapped` but sorted in docID order.
+- `trec_05_efficiency_queries.completions.forward` is the forward file. Note that each list is *not* sorted, thus the lists are the same as the ones contained in `trec_05_efficiency_queries.completions.mapped` but sorted in docID order.
+
+Running the unit tests <a name="testing"></a>
+-----------
+
+The unit tests are written using [doctest](https://github.com/onqtam/doctest).
+
+After compilation and preparation of the data for indexing (see Section [Input data format](#input)), it is advised
+to run the unit tests with:
+
+	make test
+
+Building an index <a name="building"></a>
+-----------
+
+After compiling the code, run the program `./build` to build an index. You can specify the type of the index and the name of the file
+where the index will be written.
+
+For example, with
+
+	./build ef_type1 ../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions -o trec05.ef_type1.bin
+
+we can build an index of type `ef_type1` from the test file `../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions`, that will be serialized to the file `trec05.ef_type1.bin`.
+
+Possible types are `ef_type1`, `ef_type2`, `ef_type3` and `ef_type4`.
+
+Note: the type `ef_type4` requires an extra parameter
+to be specified, `c`. Use for example: `-c 0.0001`.
 
 Benchmarks <a name="benchmarks"></a>
 ----------
 
-Run `benchmark/benchmark_prefix_topk` and `benchmark/benchmark_conjunctive_topk`.
+To run the top-k benchmarks in the `/benchmark` directory,
+we first need some query logs.
+They should have been created already if you have run the
+script `preprocess.sh`, otherwise
+you can use
+
+	python3 partition_queries_by_length.py trec_05_efficiency_queries/trec_05_efficiency_queries.completions trec_05_efficiency_queries/trec_05_efficiency_queries.completions.queries 300
+
+to partition the input completions by number of query terms
+and retain 300 queries at random.
+Query files are placed in the output directory
+`trec_05_efficiency_queries/trec_05_efficiency_queries.completions.queries`.
+(By default, 7 shards will be created: the ones having [1,6] query terms and
+the one collecting all completions with *at least* 7 query terms).
+
+Then the command
+
+	./benchmark_topk ef_type1 10 trec05.ef_type1.bin 3 300 0.25 < ../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions.queries/queries.length=3.shuffled
+
+will execute 1000 top-10 queries with 3 terms, from which only 25%
+of the prefix of the last token is retained.
+
+We automated the collection of results with the script `script/collected_topk_results_by_varying_percentage.py`.
+From within the `/build` directory, run
+
+	python3 ../script/collect_results_by_varying_percentage.py ef_type1 topk trec05.ef_type1.bin ../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions 10 300
+
+To benchmark the dictionaries (Front-Coding and trie), just run the following script from within
+the `script` directory:
 
-See the directory `results` for the results on the AOL and MSN query log.
+    bash benchmark_dictionaries.sh
 
 Live demo <a name="demo"></a>
 ----------
 
 Start the web server with the program `./web_server <port> <index_filename>` and access the demo at
-`localhost:<port>`.
\ No newline at end of file
+`localhost:<port>`.
diff --git a/TODO.md b/TODO.md
deleted file mode 100644
index 082ced9..0000000
--- a/TODO.md
+++ /dev/null
@@ -1,2 +0,0 @@
-
-- Study the effect of compression.
diff --git a/archive/.github/workflows/continuous_integration.yml b/archive/.github/workflows/continuous_integration.yml
new file mode 100644
index 0000000..bf625be
--- /dev/null
+++ b/archive/.github/workflows/continuous_integration.yml
@@ -0,0 +1,61 @@
+name: Continuous Integration
+
+on:
+  [ push,pull_request ]
+
+jobs:
+  build:
+    name: Continuous Integration
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ ubuntu-latest ]
+    steps:
+
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Checkout submodules
+        run: git submodule update --init --recursive
+
+      - name: Check cmake version
+        run: cmake --version
+
+      - name: Creating build directory
+        run: cmake -E make_directory ./build
+
+      - name: Precompilation
+        working-directory: ./build
+        run: cmake .. -DCMAKE_BUILD_TYPE=Release
+
+      - name: Compilation
+        working-directory: ./build
+        run: cmake --build . --config Release
+
+      - name: Setup python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.x'
+          architecture: 'x64'
+
+      - name: Data preprocessing
+        working-directory: ./test_data
+        run: bash preprocess.sh trec_05_efficiency_queries/trec_05_efficiency_queries.completions 300
+
+      - name: Testing
+        working-directory: ./build
+        run: ctest
+
+      - name: Build binary dictionary
+        working-directory: build
+        run: chmod +x build && ./build ef_type1 ../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions -o trec_05.ef_type1.bin
+
+      - name: Building docker image
+        run: docker build -t ${{ secrets.DOCKERHUB_USERNAME }}/autocomplete:latest .
+
+      - name: Dockerhub Authentication
+        run: docker login --username ${{ secrets.DOCKERHUB_USERNAME }} --password ${{ secrets.DOCKERHUB_ACCESS_TOKEN }}
+
+      - name: Publishing image to Container Registry
+        if: github.ref == 'refs/heads/master'
+        run: docker push ${{ secrets.DOCKERHUB_USERNAME }}/autocomplete:latest
diff --git a/CMakeLists.txt b/archive/CMakeLists.txt
similarity index 79%
rename from CMakeLists.txt
rename to archive/CMakeLists.txt
index 4c90e49..9b3c162 100644
--- a/CMakeLists.txt
+++ b/archive/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.5)
 project(AUTOCOMPLETE)
 
 if(CMAKE_BUILD_TYPE MATCHES Debug)
@@ -21,7 +21,7 @@ endif ()
 
 if(UNIX)
 
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
@@ -50,4 +50,11 @@ include_directories(${AUTOCOMPLETE_SOURCE_DIR}/include)
 add_subdirectory(external)
 add_subdirectory(src)
 add_subdirectory(benchmark)
-add_subdirectory(test)
\ No newline at end of file
+
+enable_testing()
+file(GLOB TEST_SOURCES test/test_*.cpp)
+foreach(TEST_SRC ${TEST_SOURCES})
+  get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE) # without extension
+  add_executable(${TEST_SRC_NAME} ${TEST_SRC})
+  add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME})
+endforeach(TEST_SRC)
diff --git a/archive/Dockerfile b/archive/Dockerfile
new file mode 100644
index 0000000..f29c164
--- /dev/null
+++ b/archive/Dockerfile
@@ -0,0 +1,25 @@
+FROM ubuntu:latest
+
+EXPOSE 8000
+
+RUN groupadd appgroup && useradd appuser -G appgroup
+
+COPY . /src
+
+WORKDIR /app
+
+RUN apt update && apt install -y cmake g++ python3
+
+RUN cmake /src && cmake --build .
+
+RUN chmod +x web_server && chmod +x build
+
+RUN ./build ef_type1 /src/test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions -o trec_05.ef_type1.bin
+
+RUN apt purge -y cmake g++ python3
+
+RUN rm -rf /src
+
+USER appuser
+
+CMD ["./web_server", "8000", "trec_05.ef_type1.bin"]
diff --git a/benchmark/CMakeLists.txt b/archive/benchmark/CMakeLists.txt
similarity index 59%
rename from benchmark/CMakeLists.txt
rename to archive/benchmark/CMakeLists.txt
index cf8359f..8f2c632 100644
--- a/benchmark/CMakeLists.txt
+++ b/archive/benchmark/CMakeLists.txt
@@ -1,5 +1,7 @@
-add_executable(benchmark_topk benchmark_topk.cpp)
+# add_executable(benchmark_topk benchmark_topk.cpp)
 add_executable(benchmark_prefix_topk benchmark_prefix_topk.cpp)
 add_executable(benchmark_conjunctive_topk benchmark_conjunctive_topk.cpp)
 add_executable(benchmark_fc_dictionary benchmark_fc_dictionary.cpp)
-add_executable(benchmark_integer_fc_dictionary benchmark_integer_fc_dictionary.cpp)
\ No newline at end of file
+add_executable(benchmark_integer_fc_dictionary benchmark_integer_fc_dictionary.cpp)
+add_executable(benchmark_locate_prefix benchmark_locate_prefix.cpp)
+add_executable(effectiveness effectiveness.cpp)
\ No newline at end of file
diff --git a/archive/benchmark/benchmark_common.hpp b/archive/benchmark/benchmark_common.hpp
new file mode 100644
index 0000000..1a96333
--- /dev/null
+++ b/archive/benchmark/benchmark_common.hpp
@@ -0,0 +1,131 @@
+#pragma once
+
+#include "../external/cmd_line_parser/include/parser.hpp"
+#include "probe.hpp"
+
+namespace autocomplete {
+
+namespace benchmarking {
+static const uint32_t runs = 5;
+}
+
+// void tolower(std::string& str) {
+//     std::transform(str.begin(), str.end(), str.begin(),
+//                    [](unsigned char c) { return std::tolower(c); });
+// }
+
+size_t load_queries(std::vector<std::string>& queries, uint32_t max_num_queries,
+                    float percentage, std::istream& is = std::cin) {
+    assert(percentage >= 0.0 and percentage <= 1.0);
+    std::string query;
+    queries.reserve(max_num_queries);
+    for (uint32_t i = 0; i != max_num_queries; ++i) {
+        if (!std::getline(is, query)) break;
+        assert(query.size() > 0);
+        size_t size = query.size() - 1;
+        while (size > 0 and query[size] != ' ') --size;
+        size_t last_token_size = query.size() - size;
+        size_t end = size + std::ceil(last_token_size * percentage) + 1 +
+                     1;  // retain at least one char
+        for (size = query.size(); size > end; --size) query.pop_back();
+        // tolower(query);
+        queries.push_back(query);
+    }
+    return queries.size();
+}
+
+void configure_parser_for_benchmarking(cmd_line_parser::parser& parser) {
+    parser.add("type", "Index type.");
+    parser.add("k", "top-k value.");
+    parser.add("index_filename", "Index filename.");
+    parser.add("num_terms_per_query", "Number of terms per query.");
+    parser.add("max_num_queries", "Maximum number of queries to execute.");
+    parser.add("percentage",
+               "A float in [0,1] specifying how much we keep of the last token "
+               "in a query: n x 100 <=> n%, for n in [0,1].");
+}
+
+#define BENCHMARK(what)                                                        \
+    template <typename Index>                                                  \
+    void benchmark(std::string const& index_filename, uint32_t k,              \
+                   uint32_t max_num_queries, float keep,                       \
+                   essentials::json_lines& breakdowns) {                       \
+        Index index;                                                           \
+        essentials::load(index, index_filename.c_str());                       \
+                                                                               \
+        std::vector<std::string> queries;                                      \
+        uint32_t num_queries =                                                 \
+            load_queries(queries, max_num_queries, keep, std::cin);            \
+                                                                               \
+        uint64_t reported_strings = 0;                                         \
+        auto musec_per_query = [&](double time) {                              \
+            return time / (benchmarking::runs * num_queries);                  \
+        };                                                                     \
+                                                                               \
+        breakdowns.add("num_queries", std::to_string(num_queries));            \
+                                                                               \
+        timer_probe probe(3);                                                  \
+        for (uint32_t run = 0; run != benchmarking::runs; ++run) {             \
+            for (auto const& query : queries) {                                \
+                auto it = index.what##topk(query, k, probe);                   \
+                reported_strings += it.size();                                 \
+            }                                                                  \
+        }                                                                      \
+        std::cout << "#ignore: " << reported_strings << std::endl;             \
+                                                                               \
+        breakdowns.add("reported_strings",                                     \
+                       std::to_string(reported_strings / benchmarking::runs)); \
+        breakdowns.add(                                                        \
+            "parsing_musec_per_query",                                         \
+            std::to_string(musec_per_query(probe.get(0).elapsed())));          \
+        breakdowns.add(                                                        \
+            std::string(#what) + "search_musec_per_query",                     \
+            std::to_string(musec_per_query(probe.get(1).elapsed())));          \
+        breakdowns.add(                                                        \
+            "reporting_musec_per_query",                                       \
+            std::to_string(musec_per_query(probe.get(2).elapsed())));          \
+        breakdowns.add(                                                        \
+            "total_musec_per_query",                                           \
+            std::to_string(musec_per_query(probe.get(0).elapsed()) +           \
+                           musec_per_query(probe.get(1).elapsed()) +           \
+                           musec_per_query(probe.get(2).elapsed())));          \
+    }                                                                          \
+                                                                               \
+    int main(int argc, char** argv) {                                          \
+        cmd_line_parser::parser parser(argc, argv);                            \
+        configure_parser_for_benchmarking(parser);                             \
+        if (!parser.parse()) return 1;                                         \
+                                                                               \
+        auto type = parser.get<std::string>("type");                           \
+        auto k = parser.get<uint32_t>("k");                                    \
+        auto index_filename = parser.get<std::string>("index_filename");       \
+        auto max_num_queries = parser.get<uint32_t>("max_num_queries");        \
+        auto keep = parser.get<float>("percentage");                           \
+                                                                               \
+        essentials::json_lines breakdowns;                                     \
+        breakdowns.new_line();                                                 \
+        breakdowns.add("num_terms_per_query",                                  \
+                       parser.get<std::string>("num_terms_per_query"));        \
+        breakdowns.add("percentage", std::to_string(keep));                    \
+                                                                               \
+        if (type == "ef_type1") {                                              \
+            benchmark<ef_autocomplete_type1>(                                  \
+                index_filename, k, max_num_queries, keep, breakdowns);         \
+        } else if (type == "ef_type2") {                                       \
+            benchmark<ef_autocomplete_type2>(                                  \
+                index_filename, k, max_num_queries, keep, breakdowns);         \
+        } else if (type == "ef_type3") {                                       \
+            benchmark<ef_autocomplete_type3>(                                  \
+                index_filename, k, max_num_queries, keep, breakdowns);         \
+        } else if (type == "ef_type4") {                                       \
+            benchmark<ef_autocomplete_type4>(                                  \
+                index_filename, k, max_num_queries, keep, breakdowns);         \
+        } else {                                                               \
+            return 1;                                                          \
+        }                                                                      \
+                                                                               \
+        breakdowns.print();                                                    \
+        return 0;                                                              \
+    }
+
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/benchmark/benchmark_conjunctive_topk.cpp b/archive/benchmark/benchmark_conjunctive_topk.cpp
new file mode 100644
index 0000000..df14c84
--- /dev/null
+++ b/archive/benchmark/benchmark_conjunctive_topk.cpp
@@ -0,0 +1,7 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "benchmark_common.hpp"
+
+using namespace autocomplete;
+BENCHMARK(conjunctive_)
\ No newline at end of file
diff --git a/benchmark/benchmark_fc_dictionary.cpp b/archive/benchmark/benchmark_fc_dictionary.cpp
similarity index 52%
rename from benchmark/benchmark_fc_dictionary.cpp
rename to archive/benchmark/benchmark_fc_dictionary.cpp
index f566edd..d3e66b5 100644
--- a/benchmark/benchmark_fc_dictionary.cpp
+++ b/archive/benchmark/benchmark_fc_dictionary.cpp
@@ -8,10 +8,10 @@ using namespace autocomplete;
 template <typename Dictionary>
 void perf_test(Dictionary const& dict,
                std::vector<std::string> const& queries) {
-    std::vector<uint8_t> decoded(2 * constants::MAX_NUM_CHARS_PER_QUERY);
+    static std::vector<uint8_t> decoded(2 * constants::MAX_NUM_CHARS_PER_QUERY);
     essentials::timer_type timer;
 
-    for (uint32_t i = 0; i != runs; ++i) {
+    for (uint32_t i = 0; i != benchmarking::runs; ++i) {
         timer.start();
         for (auto const& query : queries) {
             id_type id = dict.locate(string_to_byte_range(query));
@@ -20,8 +20,8 @@ void perf_test(Dictionary const& dict,
         timer.stop();
     }
 
-    std::cout << "locate: " << (timer.average() * 1000.0) / queries.size()
-              << " [ns/string]" << std::endl;
+    std::cout << "locate: " << timer.average() / queries.size()
+              << " [musec/string]" << std::endl;
 
     std::vector<id_type> ids;
     ids.reserve(queries.size());
@@ -32,7 +32,7 @@ void perf_test(Dictionary const& dict,
 
     timer.reset();
 
-    for (uint32_t i = 0; i != runs; ++i) {
+    for (uint32_t i = 0; i != benchmarking::runs; ++i) {
         timer.start();
         for (auto const& id : ids) {
             uint8_t string_len = dict.extract(id, decoded.data());
@@ -41,8 +41,30 @@ void perf_test(Dictionary const& dict,
         timer.stop();
     }
 
-    std::cout << "extract: " << (timer.average() * 1000.0) / ids.size()
-              << " [ns/string]" << std::endl;
+    std::cout << "extract: " << timer.average() / ids.size()
+              << " [musec/string]" << std::endl;
+
+    static std::vector<float> percentages = {0.0, 0.25, 0.50, 0.75, 1.0};
+    for (auto p : percentages) {
+        timer.reset();
+        for (uint32_t i = 0; i != benchmarking::runs; ++i) {
+            timer.start();
+            for (auto const& query : queries) {
+                size_t size = query.size();
+                size_t n = size * p;
+                if (n == 0) n += 1;  // at least one char
+                uint8_t const* addr =
+                    reinterpret_cast<uint8_t const*>(query.data());
+                range r = dict.locate_prefix({addr, addr + n});
+                essentials::do_not_optimize_away(r.end - r.begin);
+            }
+            timer.stop();
+        }
+
+        std::cout << "\tlocate_prefix-" << p * 100.0
+                  << "%: " << timer.average() / queries.size()
+                  << " [musec/string]" << std::endl;
+    }
 }
 
 #define exe(BUCKET_SIZE)                                                     \
@@ -57,30 +79,29 @@ void perf_test(Dictionary const& dict,
     }
 
 int main(int argc, char** argv) {
-    int mandatory = 2 + 1;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> <num_queries> < queries"
-                  << std::endl;
-        return 1;
-    }
+    cmd_line_parser::parser parser(argc, argv);
+    parser.add("collection_basename", "Collection basename.");
+    parser.add("max_num_queries", "Maximum number of queries to execute.");
+    if (!parser.parse()) return 1;
 
     parameters params;
-    params.collection_basename = argv[1];
+    params.collection_basename = parser.get<std::string>("collection_basename");
     params.load();
 
-    uint32_t num_queries = std::atoi(argv[2]);
+    auto max_num_queries = parser.get<uint32_t>("max_num_queries");
 
     essentials::logger("loading queries...");
     std::vector<std::string> queries;
-    queries.reserve(num_queries);
+    queries.reserve(max_num_queries);
     std::string query;
     query.reserve(2 * constants::MAX_NUM_CHARS_PER_QUERY);
-    for (uint32_t i = 0; i != num_queries; ++i) {
+    for (uint32_t i = 0; i != max_num_queries; ++i) {
         if (!std::getline(std::cin, query)) break;
         queries.push_back(std::move(query));
     }
-    num_queries = queries.size();
-    essentials::logger("loaded " + std::to_string(num_queries) + " queries");
+    max_num_queries = queries.size();
+    essentials::logger("loaded " + std::to_string(max_num_queries) +
+                       " queries");
 
     exe(4) exe(8) exe(16) exe(32) exe(64) exe(128) exe(256) return 0;
 }
\ No newline at end of file
diff --git a/benchmark/benchmark_integer_fc_dictionary.cpp b/archive/benchmark/benchmark_integer_fc_dictionary.cpp
similarity index 94%
rename from benchmark/benchmark_integer_fc_dictionary.cpp
rename to archive/benchmark/benchmark_integer_fc_dictionary.cpp
index f1e35d9..8cb2b32 100644
--- a/benchmark/benchmark_integer_fc_dictionary.cpp
+++ b/archive/benchmark/benchmark_integer_fc_dictionary.cpp
@@ -8,10 +8,10 @@ using namespace autocomplete;
 
 template <typename Dictionary>
 void perf_test(Dictionary const& dict, std::vector<id_type> const& queries) {
-    completion_type decoded(2 * constants::MAX_NUM_CHARS_PER_QUERY);
+    static completion_type decoded(2 * constants::MAX_NUM_CHARS_PER_QUERY);
     essentials::timer_type timer;
 
-    for (uint32_t i = 0; i != runs; ++i) {
+    for (uint32_t i = 0; i != benchmarking::runs; ++i) {
         timer.start();
         for (auto const& id : queries) {
             uint8_t string_len = dict.extract(id, decoded);
diff --git a/archive/benchmark/benchmark_locate_prefix.cpp b/archive/benchmark/benchmark_locate_prefix.cpp
new file mode 100644
index 0000000..a9e374a
--- /dev/null
+++ b/archive/benchmark/benchmark_locate_prefix.cpp
@@ -0,0 +1,111 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "statistics.hpp"
+#include "benchmark_common.hpp"
+
+using namespace autocomplete;
+
+typedef std::pair<completion_type, range> query_type;
+
+template <typename Index>
+void benchmark(parameters const& params, std::vector<query_type>& queries,
+               uint32_t num_queries, uint32_t num_terms_per_query, float keep) {
+    essentials::json_lines result;
+    result.new_line();
+    result.add("num_terms_per_query", std::to_string(num_terms_per_query));
+    result.add("percentage", std::to_string(keep));
+    result.add("num_queries", std::to_string(num_queries));
+
+    Index index;
+    {
+        typename Index::builder builder(params);
+        builder.build(index);
+    }
+
+    result.add("MiB", std::to_string(static_cast<double>(index.bytes()) /
+                                     essentials::MiB));
+    result.add(
+        "bytes_per_completion",
+        std::to_string(static_cast<double>(index.bytes()) / index.size()));
+
+    essentials::timer_type timer;
+    timer.start();
+    for (uint32_t run = 0; run != benchmarking::runs; ++run) {
+        for (auto& query : queries) {
+            auto r = index.locate_prefix(query.first, query.second);
+            essentials::do_not_optimize_away(r.end - r.begin);
+        }
+    }
+    timer.stop();
+    result.add(
+        "musec_per_query",
+        std::to_string(timer.elapsed() / (benchmarking::runs * num_queries)));
+    result.print();
+}
+
+int main(int argc, char** argv) {
+    cmd_line_parser::parser parser(argc, argv);
+    parser.add("type", "Index type.");
+    parser.add("collection_basename", "Collection basename.");
+    parser.add("num_terms_per_query", "Number of terms per query.");
+    parser.add("max_num_queries", "Maximum number of queries to execute.");
+    parser.add("percentage",
+               "A float in [0,1] specifying how much we keep of the last token "
+               "in a query.");
+    if (!parser.parse()) return 1;
+
+    parameters params;
+    params.collection_basename = parser.get<std::string>("collection_basename");
+    params.load();
+
+    auto type = parser.get<std::string>("type");
+    auto max_num_queries = parser.get<uint32_t>("max_num_queries");
+    auto num_terms_per_query = parser.get<uint32_t>("num_terms_per_query");
+    auto keep = parser.get<float>("percentage");
+
+    fc_dictionary_type dict;
+    {
+        fc_dictionary_type::builder builder(params);
+        builder.build(dict);
+    }
+
+    std::vector<std::string> strings;
+    std::vector<query_type> queries;
+    uint32_t num_queries = 0;
+
+    {
+        num_queries = load_queries(strings, max_num_queries, keep, std::cin);
+        for (auto const& string : strings) {
+            completion_type prefix;
+            byte_range suffix;
+            parse(dict, string, prefix, suffix, true);
+            range suffix_lex_range = dict.locate_prefix(suffix);
+            queries.emplace_back(prefix, suffix_lex_range);
+        }
+    }
+
+    if (type == "trie") {
+        benchmark<ef_completion_trie>(params, queries, num_queries,
+                                      num_terms_per_query, keep);
+    } else if (type == "fc") {
+        // benchmark<integer_fc_dictionary<4>>(params, queries, num_queries,
+        //                                     num_terms_per_query, keep);
+        // benchmark<integer_fc_dictionary<8>>(params, queries, num_queries,
+        //                                     num_terms_per_query, keep);
+        benchmark<integer_fc_dictionary<16>>(params, queries, num_queries,
+                                             num_terms_per_query, keep);
+        // benchmark<integer_fc_dictionary<32>>(params, queries, num_queries,
+        //                                      num_terms_per_query, keep);
+        // benchmark<integer_fc_dictionary<64>>(params, queries, num_queries,
+        //                                      num_terms_per_query, keep);
+        // benchmark<integer_fc_dictionary<128>>(params, queries, num_queries,
+        //                                       num_terms_per_query, keep);
+        // benchmark<integer_fc_dictionary<256>>(params, queries, num_queries,
+        //                                       num_terms_per_query, keep);
+    } else {
+        return 1;
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/archive/benchmark/benchmark_prefix_topk.cpp b/archive/benchmark/benchmark_prefix_topk.cpp
new file mode 100644
index 0000000..69a0bc1
--- /dev/null
+++ b/archive/benchmark/benchmark_prefix_topk.cpp
@@ -0,0 +1,7 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "benchmark_common.hpp"
+
+using namespace autocomplete;
+BENCHMARK(prefix_)
\ No newline at end of file
diff --git a/archive/benchmark/benchmark_topk.cpp b/archive/benchmark/benchmark_topk.cpp
new file mode 100644
index 0000000..98d208c
--- /dev/null
+++ b/archive/benchmark/benchmark_topk.cpp
@@ -0,0 +1,7 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "benchmark_common.hpp"
+
+using namespace autocomplete;
+BENCHMARK("")
\ No newline at end of file
diff --git a/archive/benchmark/effectiveness.cpp b/archive/benchmark/effectiveness.cpp
new file mode 100644
index 0000000..e9c6590
--- /dev/null
+++ b/archive/benchmark/effectiveness.cpp
@@ -0,0 +1,137 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "benchmark_common.hpp"
+
+using namespace autocomplete;
+
+template <typename Index>
+void benchmark(std::string const& index_filename, uint32_t k,
+               uint32_t max_num_queries, float keep,
+               essentials::json_lines& stats, bool verbose) {
+    Index index1, index2;
+    essentials::load(index1, index_filename.c_str());
+    essentials::load(index2, index_filename.c_str());
+
+    std::vector<std::string> queries;
+    uint32_t num_queries =
+        load_queries(queries, max_num_queries, keep, std::cin);
+    uint64_t strings_reported_by_prefix_search = 0;
+    uint64_t better_scored_strings_reported_by_conjunctive_search = 0;
+
+    stats.add("num_queries", std::to_string(num_queries));
+
+    std::vector<uint64_t> difference;
+    difference.reserve(k);
+    nop_probe probe;
+
+    for (auto const& query : queries) {
+        auto it1 = index1.prefix_topk(query, k, probe);
+        auto it2 = index2.conjunctive_topk(query, k, probe);
+        strings_reported_by_prefix_search += it1.size();
+
+        uint64_t more = 0;
+        assert(it2.size() >= it1.size());
+
+        auto const& prefix_search_scores = it1.pool()->const_scores();
+        auto const& conjunctive_search_scores = it2.pool()->const_scores();
+        assert(std::is_sorted(prefix_search_scores.begin(),
+                              prefix_search_scores.begin() + it1.size()));
+        assert(std::is_sorted(conjunctive_search_scores.begin(),
+                              conjunctive_search_scores.begin() + it2.size()));
+
+        if (verbose) {
+            std::cout << "query: '" << query << "'" << std::endl;
+            {
+                auto it = it1;
+                std::cout << "prefix_search results: " << it.size()
+                          << std::endl;
+                for (uint64_t i = 0; i != it.size(); ++i, ++it) {
+                    auto completion = *it;
+                    std::cout << completion.score << ": "
+                              << std::string(completion.string.begin,
+                                             completion.string.end)
+                              << std::endl;
+                }
+            }
+            {
+                auto it = it2;
+                std::cout << "conjunctive_search results: " << it.size()
+                          << std::endl;
+                for (uint64_t i = 0; i != it.size(); ++i, ++it) {
+                    auto completion = *it;
+                    std::cout << completion.score << ": "
+                              << std::string(completion.string.begin,
+                                             completion.string.end)
+                              << std::endl;
+                }
+            }
+        }
+
+        difference.clear();
+        auto it = std::set_difference(
+            conjunctive_search_scores.begin(),
+            conjunctive_search_scores.begin() + it2.size(),
+            prefix_search_scores.begin(),
+            prefix_search_scores.begin() + it1.size(), difference.begin());
+        more = std::distance(difference.begin(), it);
+        if (verbose) std::cout << "more: " << more << std::endl;
+        better_scored_strings_reported_by_conjunctive_search += more;
+    }
+
+    stats.add("strings_reported_by_prefix_search",
+              std::to_string(strings_reported_by_prefix_search));
+    stats.add(
+        "better_scored_strings_reported_by_conjunctive_search",
+        std::to_string(better_scored_strings_reported_by_conjunctive_search));
+    stats.add(
+        "better_scored_strings_reported_by_conjunctive_search_in_percentage",
+        std::to_string(better_scored_strings_reported_by_conjunctive_search *
+                       100.0 / strings_reported_by_prefix_search));
+}
+
+int main(int argc, char** argv) {
+    cmd_line_parser::parser parser(argc, argv);
+    parser.add("type", "Index type.");
+    parser.add("k", "top-k value.");
+    parser.add("index_filename", "Index filename.");
+    parser.add("num_terms_per_query", "Number of terms per query.");
+    parser.add("max_num_queries", "Maximum number of queries to execute.");
+    parser.add("percentage",
+               "A float in [0,1] specifying how much we keep of the last token "
+               "in a query: n x 100 <=> n%, for n in [0,1].");
+    parser.add("verbose", "Verbose output.", "--verbose");
+    if (!parser.parse()) return 1;
+
+    auto type = parser.get<std::string>("type");
+    auto k = parser.get<uint32_t>("k");
+    auto index_filename = parser.get<std::string>("index_filename");
+    auto max_num_queries = parser.get<uint32_t>("max_num_queries");
+    auto keep = parser.get<float>("percentage");
+    auto verbose = parser.get<bool>("verbose");
+
+    essentials::json_lines stats;
+    stats.new_line();
+    stats.add("num_terms_per_query",
+              parser.get<std::string>("num_terms_per_query"));
+    stats.add("percentage", std::to_string(keep));
+
+    if (type == "ef_type1") {
+        benchmark<ef_autocomplete_type1>(index_filename, k, max_num_queries,
+                                         keep, stats, verbose);
+    } else if (type == "ef_type2") {
+        benchmark<ef_autocomplete_type2>(index_filename, k, max_num_queries,
+                                         keep, stats, verbose);
+    } else if (type == "ef_type3") {
+        benchmark<ef_autocomplete_type3>(index_filename, k, max_num_queries,
+                                         keep, stats, verbose);
+    } else if (type == "ef_type4") {
+        benchmark<ef_autocomplete_type4>(index_filename, k, max_num_queries,
+                                         keep, stats, verbose);
+    } else {
+        return 1;
+    }
+
+    stats.print();
+    return 0;
+}
\ No newline at end of file
diff --git a/archive/example.sh b/archive/example.sh
new file mode 100644
index 0000000..4ac00bf
--- /dev/null
+++ b/archive/example.sh
@@ -0,0 +1,3 @@
+cd build
+./build ef_type1 ../test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions -o trec_05.ef_type1.bin
+./web_server 8000 trec_05.ef_type1.bin
\ No newline at end of file
diff --git a/archive/external/CMakeLists.txt b/archive/external/CMakeLists.txt
new file mode 100644
index 0000000..5d0ee92
--- /dev/null
+++ b/archive/external/CMakeLists.txt
@@ -0,0 +1,4 @@
+include_directories(essentials/include)
+
+set(DOCTEST_INCLUDE_DIR ${AUTOCOMPLETE_SOURCE_DIR}/external/doctest)
+include_directories(${DOCTEST_INCLUDE_DIR})
\ No newline at end of file
diff --git a/archive/include/autocomplete.hpp b/archive/include/autocomplete.hpp
new file mode 100644
index 0000000..78e54ad
--- /dev/null
+++ b/archive/include/autocomplete.hpp
@@ -0,0 +1,223 @@
+#pragma once
+
+#include "util_types.hpp"
+#include "autocomplete_common.hpp"
+#include "scored_string_pool.hpp"
+#include "constants.hpp"
+
+namespace autocomplete {
+
+template <typename Completions, typename Dictionary, typename InvertedIndex,
+          typename ForwardIndex>
+struct autocomplete {
+    typedef scored_string_pool::iterator iterator_type;
+
+    autocomplete() {
+        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
+    }
+
+    autocomplete(parameters const& params)
+        : autocomplete() {
+        typename Completions::builder cm_builder(params);
+        typename Dictionary::builder di_builder(params);
+        typename InvertedIndex::builder ii_builder(params);
+        typename ForwardIndex::builder fi_builder(params);
+
+        m_unsorted_docs_list.build(cm_builder.doc_ids());
+        m_unsorted_minimal_docs_list.build(ii_builder.minimal_doc_ids());
+
+        cm_builder.build(m_completions);
+        di_builder.build(m_dictionary);
+        ii_builder.build(m_inverted_index);
+        fi_builder.build(m_forward_index);
+    }
+
+    template <typename Probe>
+    iterator_type prefix_topk(std::string const& query, const uint32_t k,
+                              Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = true;
+        if (!parse(m_dictionary, query, prefix, suffix, must_find_prefix)) {
+            return m_pool.begin();
+        }
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+        if (r.is_invalid()) return m_pool.begin();
+        uint32_t num_completions =
+            m_unsorted_docs_list.topk(r, k, m_pool.scores());
+        probe.stop(1);
+
+        probe.start(2);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    template <typename Probe>
+    iterator_type conjunctive_topk(std::string const& query, const uint32_t k,
+                                   Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = false;
+        parse(m_dictionary, query, prefix, suffix, must_find_prefix);
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        uint32_t num_completions = 0;
+        if (prefix.size() == 0) {
+            suffix_lex_range.end += 1;
+            num_completions = m_unsorted_minimal_docs_list.topk(
+                m_inverted_index, suffix_lex_range, k, m_pool.scores());
+        } else {
+            suffix_lex_range.begin += 1;
+            suffix_lex_range.end += 1;
+            num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
+        }
+        probe.stop(1);
+
+        probe.start(2);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    // iterator_type topk(std::string const& query, const uint32_t k) {
+    //     assert(k <= constants::MAX_K);
+    //     init();
+    //     completion_type prefix;
+    //     byte_range suffix;
+    //     uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
+    //     assert(num_terms > 0);
+
+    //     range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+    //     if (suffix_lex_range.is_invalid()) return m_pool.begin();
+
+    //     suffix_lex_range.begin += 1;
+    //     suffix_lex_range.end += 1;
+    //     range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+
+    //     uint32_t num_completions = 0;
+    //     if (r.is_valid()) {
+    //         num_completions = m_unsorted_docs_list.topk(r, k,
+    //         m_pool.scores());
+    //     }
+
+    //     if (num_completions < k) {
+    //         if (num_terms == 1) {  // special case
+    //             suffix_lex_range.begin -= 1;
+    //             num_completions = m_unsorted_minimal_docs_list.topk(
+    //                 suffix_lex_range, k, m_pool.scores(),
+    //                 true  // must return unique results
+    //             );
+    //         } else {
+    //             num_completions = conjunctive_topk(prefix, suffix_lex_range,
+    //             k);
+    //         }
+    //     }
+
+    //     return extract_strings(num_completions);
+    // }
+
+    size_t bytes() const {
+        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
+               m_unsorted_minimal_docs_list.bytes() + m_dictionary.bytes() +
+               m_inverted_index.bytes() + m_forward_index.bytes();
+    }
+
+    void print_stats() const;
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_completions);
+        visitor.visit(m_unsorted_docs_list);
+        visitor.visit(m_unsorted_minimal_docs_list);
+        visitor.visit(m_dictionary);
+        visitor.visit(m_inverted_index);
+        visitor.visit(m_forward_index);
+    }
+
+private:
+    Completions m_completions;
+    unsorted_list_type m_unsorted_docs_list;
+    typedef minimal_docids<cartesian_tree, InvertedIndex> minimal_docids_type;
+    minimal_docids_type m_unsorted_minimal_docs_list;
+    Dictionary m_dictionary;
+    InvertedIndex m_inverted_index;
+    ForwardIndex m_forward_index;
+
+    scored_string_pool m_pool;
+
+    void init() {
+        m_pool.clear();
+        m_pool.init();
+        assert(m_pool.size() == 0);
+    }
+
+    uint32_t conjunctive_topk(completion_type& prefix, const range suffix,
+                              uint32_t const k) {
+        deduplicate(prefix);
+        if (prefix.size() == 1) {  // we've got nothing to intersect
+            auto it = m_inverted_index.iterator(prefix.front() - 1);
+            return conjunctive_topk(it, suffix, k);
+        }
+        auto it = m_inverted_index.intersection_iterator(prefix);
+        return conjunctive_topk(it, suffix, k);
+    }
+
+    template <typename Iterator>
+    uint32_t conjunctive_topk(Iterator& it, const range r, uint32_t const k) {
+        auto& topk_scores = m_pool.scores();
+        uint32_t results = 0;
+        for (; it.has_next(); ++it) {
+            auto doc_id = *it;
+            if (m_forward_index.intersects(doc_id, r)) {
+                topk_scores[results++] = doc_id;
+                if (results == k) break;
+            }
+        }
+        return results;
+    }
+
+    iterator_type extract_strings(const uint32_t num_completions) {
+        auto const& topk_scores = m_pool.scores();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto doc_id = topk_scores[i];
+            auto it = m_forward_index.iterator(doc_id);
+            uint64_t offset = m_pool.bytes();
+            uint8_t* decoded = m_pool.data() + offset;
+            for (uint32_t j = 0; j != it.size(); ++j, ++it) {
+                auto term_id = *it;
+                uint8_t len = m_dictionary.extract(term_id, decoded);
+                decoded += len;
+                offset += len;
+                if (j != it.size() - 1) {
+                    *decoded++ = ' ';
+                    offset++;
+                }
+            }
+            m_pool.push_back_offset(offset);
+        }
+        assert(m_pool.size() == num_completions);
+        return m_pool.begin();
+    }
+};
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/include/autocomplete2.hpp b/archive/include/autocomplete2.hpp
new file mode 100644
index 0000000..eb3f994
--- /dev/null
+++ b/archive/include/autocomplete2.hpp
@@ -0,0 +1,256 @@
+#pragma once
+
+#include "util_types.hpp"
+#include "building_util.hpp"
+#include "compact_vector.hpp"
+#include "autocomplete_common.hpp"
+#include "scored_string_pool.hpp"
+#include "constants.hpp"
+
+namespace autocomplete {
+
+template <typename Completions, typename Dictionary, typename InvertedIndex>
+struct autocomplete2 {
+    typedef scored_string_pool::iterator iterator_type;
+
+    autocomplete2() {
+        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
+        m_topk_completion_set.resize(constants::MAX_K,
+                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
+    }
+
+    autocomplete2(parameters const& params)
+        : autocomplete2() {
+        typename Completions::builder cm_builder(params);
+        typename Dictionary::builder di_builder(params);
+        typename InvertedIndex::builder ii_builder(params);
+        auto const& docid_to_lexid = cm_builder.docid_to_lexid();
+        m_docid_to_lexid.build(docid_to_lexid.begin(), docid_to_lexid.size(),
+                               util::ceil_log2(params.num_completions + 1));
+        m_unsorted_docs_list.build(
+            util::invert(docid_to_lexid, params.num_completions));
+        m_unsorted_minimal_docs_list.build(ii_builder.minimal_doc_ids());
+        cm_builder.build(m_completions);
+        di_builder.build(m_dictionary);
+        ii_builder.build(m_inverted_index);
+    }
+
+    template <typename Probe>
+    iterator_type prefix_topk(std::string const& query, const uint32_t k,
+                              Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = true;
+        if (!parse(m_dictionary, query, prefix, suffix, must_find_prefix)) {
+            return m_pool.begin();
+        }
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+        if (r.is_invalid()) return m_pool.begin();
+        uint32_t num_completions =
+            m_unsorted_docs_list.topk(r, k, m_pool.scores());
+        probe.stop(1);
+
+        probe.start(2);
+        extract_completions(num_completions);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    template <typename Probe>
+    iterator_type conjunctive_topk(std::string const& query, const uint32_t k,
+                                   Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = false;
+        parse(m_dictionary, query, prefix, suffix, must_find_prefix);
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        uint32_t num_completions = 0;
+        if (prefix.size() == 0) {
+            suffix_lex_range.end += 1;
+            num_completions = m_unsorted_minimal_docs_list.topk(
+                m_inverted_index, suffix_lex_range, k, m_pool.scores());
+            extract_completions(num_completions);
+        } else {
+            suffix_lex_range.begin += 1;
+            suffix_lex_range.end += 1;
+            num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
+        }
+        probe.stop(1);
+
+        probe.start(2);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    // iterator_type topk(std::string const& query, const uint32_t k) {
+    //     assert(k <= constants::MAX_K);
+    //     init();
+    //     completion_type prefix;
+    //     byte_range suffix;
+    //     uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
+    //     assert(num_terms > 0);
+
+    //     range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+    //     if (suffix_lex_range.is_invalid()) return m_pool.begin();
+
+    //     suffix_lex_range.begin += 1;
+    //     suffix_lex_range.end += 1;
+    //     range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+
+    //     uint32_t num_completions = 0;
+    //     if (r.is_valid()) {
+    //         num_completions = m_unsorted_docs_list.topk(r, k,
+    //         m_pool.scores());
+    //     }
+
+    //     if (num_completions < k) {
+    //         if (num_terms == 1) {  // special case
+    //             suffix_lex_range.begin -= 1;
+    //             num_completions = m_unsorted_minimal_docs_list.topk(
+    //                 suffix_lex_range, k, m_pool.scores(),
+    //                 true  // must return unique results
+    //             );
+    //             extract_completions(num_completions);
+    //         } else {
+    //             num_completions = conjunctive_topk(prefix, suffix_lex_range,
+    //             k);
+    //         }
+    //     } else {
+    //         extract_completions(num_completions);
+    //     }
+
+    //     return extract_strings(num_completions);
+    // }
+
+    size_t bytes() const {
+        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
+               m_unsorted_minimal_docs_list.bytes() + m_dictionary.bytes() +
+               m_docid_to_lexid.bytes() + m_inverted_index.bytes();
+    }
+
+    void print_stats() const;
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_completions);
+        visitor.visit(m_unsorted_docs_list);
+        visitor.visit(m_unsorted_minimal_docs_list);
+        visitor.visit(m_dictionary);
+        visitor.visit(m_inverted_index);
+        visitor.visit(m_docid_to_lexid);
+    }
+
+private:
+    Completions m_completions;
+    unsorted_list_type m_unsorted_docs_list;
+    typedef minimal_docids<cartesian_tree, InvertedIndex> minimal_docids_type;
+    minimal_docids_type m_unsorted_minimal_docs_list;
+    Dictionary m_dictionary;
+    InvertedIndex m_inverted_index;
+    compact_vector m_docid_to_lexid;
+
+    scored_string_pool m_pool;
+    completion_set m_topk_completion_set;
+
+    void init() {
+        m_pool.clear();
+        m_pool.init();
+        assert(m_pool.size() == 0);
+    }
+
+    void extract_completions(const uint32_t num_completions) {
+        auto const& topk_scores = m_pool.scores();
+        auto& completions = m_topk_completion_set.completions();
+        auto& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto doc_id = topk_scores[i];
+            auto lex_id = m_docid_to_lexid[doc_id];
+            uint8_t size = m_completions.extract(lex_id, completions[i]);
+            sizes[i] = size;
+        }
+    }
+
+    uint32_t conjunctive_topk(completion_type& prefix, const range suffix,
+                              uint32_t const k) {
+        deduplicate(prefix);
+        if (prefix.size() == 1) {  // we've got nothing to intersect
+            auto it = m_inverted_index.iterator(prefix.front() - 1);
+            return conjunctive_topk(it, suffix, k);
+        }
+        auto it = m_inverted_index.intersection_iterator(prefix);
+        return conjunctive_topk(it, suffix, k);
+    }
+
+    template <typename Iterator>
+    uint32_t conjunctive_topk(Iterator& it, const range r, const uint32_t k) {
+        auto& topk_scores = m_pool.scores();
+        auto& completions = m_topk_completion_set.completions();
+        auto& sizes = m_topk_completion_set.sizes();
+        uint32_t i = 0;
+
+        for (; it.has_next(); ++it) {
+            auto doc_id = *it;
+            auto lex_id = m_docid_to_lexid[doc_id];
+            uint32_t size = m_completions.extract(lex_id, completions[i]);
+            for (uint32_t j = 0; j != size; ++j) {
+                if (r.contains(completions[i][j])) {
+                    topk_scores[i] = doc_id;
+                    sizes[i] = size;
+                    ++i;
+                    if (i == k) return k;
+                    break;
+                }
+            }
+        }
+
+        return i;
+    }
+
+    iterator_type extract_strings(const uint32_t num_completions) {
+        auto const& completions = m_topk_completion_set.completions();
+        auto const& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto const& c = completions[i];
+            uint32_t size = sizes[i];
+            uint64_t offset = m_pool.bytes();
+            uint8_t* decoded = m_pool.data() + offset;
+            for (uint32_t j = 0; j != size; ++j) {
+                auto term_id = c[j];
+                uint8_t len = m_dictionary.extract(term_id, decoded);
+                decoded += len;
+                offset += len;
+                if (j != size - 1) {
+                    *decoded++ = ' ';
+                    offset++;
+                }
+            }
+            m_pool.push_back_offset(offset);
+        }
+        assert(m_pool.size() == num_completions);
+        return m_pool.begin();
+    }
+};
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/include/autocomplete3.hpp b/archive/include/autocomplete3.hpp
new file mode 100644
index 0000000..6765ad6
--- /dev/null
+++ b/archive/include/autocomplete3.hpp
@@ -0,0 +1,264 @@
+#pragma once
+
+#include "util_types.hpp"
+#include "building_util.hpp"
+#include "compact_vector.hpp"
+#include "autocomplete_common.hpp"
+#include "scored_string_pool.hpp"
+#include "constants.hpp"
+
+namespace autocomplete {
+
+/*
+During the conjunctive step, maintain a min-heap of iterators,
+one iterator for each termID in the lexicographic range of the
+last token of the query.
+*/
+
+template <typename Completions, typename Dictionary, typename InvertedIndex>
+struct autocomplete3 {
+    typedef scored_string_pool::iterator iterator_type;
+    typedef min_heap<typename InvertedIndex::iterator_type,
+                     iterator_comparator<typename InvertedIndex::iterator_type>>
+        min_priority_queue_type;
+
+    autocomplete3() {
+        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
+        m_topk_completion_set.resize(constants::MAX_K,
+                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
+    }
+
+    autocomplete3(parameters const& params)
+        : autocomplete3() {
+        typename Completions::builder cm_builder(params);
+        typename Dictionary::builder di_builder(params);
+        typename InvertedIndex::builder ii_builder(params);
+        auto const& docid_to_lexid = cm_builder.docid_to_lexid();
+        m_docid_to_lexid.build(docid_to_lexid.begin(), docid_to_lexid.size(),
+                               util::ceil_log2(params.num_completions + 1));
+        m_unsorted_docs_list.build(
+            util::invert(docid_to_lexid, params.num_completions));
+        cm_builder.build(m_completions);
+        di_builder.build(m_dictionary);
+        ii_builder.build(m_inverted_index);
+    }
+
+    template <typename Probe>
+    iterator_type prefix_topk(std::string const& query, const uint32_t k,
+                              Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = true;
+        if (!parse(m_dictionary, query, prefix, suffix, must_find_prefix)) {
+            return m_pool.begin();
+        }
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+        if (r.is_invalid()) return m_pool.begin();
+        uint32_t num_completions =
+            m_unsorted_docs_list.topk(r, k, m_pool.scores());
+        probe.stop(1);
+
+        probe.start(2);
+        extract_completions(num_completions);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    template <typename Probe>
+    iterator_type conjunctive_topk(std::string const& query, const uint32_t k,
+                                   Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = false;
+        parse(m_dictionary, query, prefix, suffix, must_find_prefix);
+        probe.stop(0);
+
+        probe.start(1);
+        uint32_t num_completions = 0;
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
+        probe.stop(1);
+
+        probe.start(2);
+        extract_completions(num_completions);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    // iterator_type topk(std::string const& query, const uint32_t k) {
+    //     assert(k <= constants::MAX_K);
+    //     init();
+    //     completion_type prefix;
+    //     byte_range suffix;
+    //     uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
+    //     assert(num_terms > 0);
+
+    //     range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+    //     if (suffix_lex_range.is_invalid()) return m_pool.begin();
+
+    //     suffix_lex_range.begin += 1;
+    //     suffix_lex_range.end += 1;
+    //     range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+
+    //     uint32_t num_completions = 0;
+    //     if (r.is_valid()) {
+    //         num_completions = m_unsorted_docs_list.topk(r, k,
+    //         m_pool.scores());
+    //     }
+
+    //     if (num_completions < k) {
+    //         num_completions =
+    //             conjunctive_topk(num_terms, prefix, suffix_lex_range, k);
+    //     }
+
+    //     extract_completions(num_completions);
+    //     return extract_strings(num_completions);
+    // }
+
+    size_t bytes() const {
+        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
+               m_dictionary.bytes() + m_docid_to_lexid.bytes() +
+               m_inverted_index.bytes();
+    }
+
+    void print_stats() const;
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_completions);
+        visitor.visit(m_unsorted_docs_list);
+        visitor.visit(m_dictionary);
+        visitor.visit(m_inverted_index);
+        visitor.visit(m_docid_to_lexid);
+    }
+
+private:
+    Completions m_completions;
+    unsorted_list_type m_unsorted_docs_list;
+    Dictionary m_dictionary;
+    InvertedIndex m_inverted_index;
+    compact_vector m_docid_to_lexid;
+
+    scored_string_pool m_pool;
+    completion_set m_topk_completion_set;
+
+    void init() {
+        m_pool.clear();
+        m_pool.init();
+        assert(m_pool.size() == 0);
+    }
+
+    void extract_completions(const uint32_t num_completions) {
+        auto const& topk_scores = m_pool.scores();
+        auto& completions = m_topk_completion_set.completions();
+        auto& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto doc_id = topk_scores[i];
+            auto lex_id = m_docid_to_lexid[doc_id];
+            uint8_t size = m_completions.extract(lex_id, completions[i]);
+            sizes[i] = size;
+        }
+    }
+
+    uint32_t conjunctive_topk(completion_type& prefix,
+                              const range suffix_lex_range, const uint32_t k) {
+        if (prefix.size() == 0) {  // we've got nothing to intersect
+            return heap_topk(m_inverted_index, suffix_lex_range, k,
+                             m_pool.scores());
+        }
+        deduplicate(prefix);
+        if (prefix.size() == 1) {  // we've got nothing to intersect
+            auto it = m_inverted_index.iterator(prefix.front() - 1);
+            return conjunctive_topk(it, suffix_lex_range, k);
+        }
+        auto it = m_inverted_index.intersection_iterator(prefix);
+        return conjunctive_topk(it, suffix_lex_range, k);
+    }
+
+    template <typename Iterator>
+    uint32_t conjunctive_topk(Iterator& it, const range r, const uint32_t k) {
+        assert(r.is_valid());
+
+        auto& topk_scores = m_pool.scores();
+        min_priority_queue_type q;
+        q.reserve(r.end - r.begin + 1);  // inclusive range
+        assert(r.begin > 0);
+        for (uint64_t term_id = r.begin; term_id <= r.end; ++term_id) {
+            q.push_back(m_inverted_index.iterator(term_id - 1));
+        }
+        q.make_heap();
+
+        uint32_t results = 0;
+        for (; it.has_next() and !q.empty(); ++it) {
+            auto doc_id = *it;
+            while (!q.empty()) {
+                auto& z = q.top();
+                auto val = *z;
+                if (val > doc_id) break;
+                if (val < doc_id) {
+                    val = z.next_geq(doc_id);
+                    if (!z.has_next()) {
+                        q.pop();
+                    } else {
+                        q.heapify();
+                    }
+                }
+                if (val == doc_id) {  // NOTE: putting else here seems to slow
+                                      // down the code!
+                    topk_scores[results++] = doc_id;
+                    if (results == k) return results;
+                    break;
+                }
+            }
+        }
+
+        return results;
+    }
+
+    iterator_type extract_strings(const uint32_t num_completions) {
+        auto const& completions = m_topk_completion_set.completions();
+        auto const& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto const& c = completions[i];
+            uint32_t size = sizes[i];
+            uint64_t offset = m_pool.bytes();
+            uint8_t* decoded = m_pool.data() + offset;
+            for (uint32_t j = 0; j != size; ++j) {
+                auto term_id = c[j];
+                uint8_t len = m_dictionary.extract(term_id, decoded);
+                decoded += len;
+                offset += len;
+                if (j != size - 1) {
+                    *decoded++ = ' ';
+                    offset++;
+                }
+            }
+            m_pool.push_back_offset(offset);
+        }
+        assert(m_pool.size() == num_completions);
+        return m_pool.begin();
+    }
+};
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/include/autocomplete4.hpp b/archive/include/autocomplete4.hpp
new file mode 100644
index 0000000..7d84bae
--- /dev/null
+++ b/archive/include/autocomplete4.hpp
@@ -0,0 +1,290 @@
+#pragma once
+
+#include "util_types.hpp"
+#include "building_util.hpp"
+#include "compact_vector.hpp"
+#include "autocomplete_common.hpp"
+#include "scored_string_pool.hpp"
+#include "constants.hpp"
+
+namespace autocomplete {
+
+/* Bast and Weber approach. */
+
+template <typename Completions, typename Dictionary,
+          typename BlockedInvertedIndex>
+struct autocomplete4 {
+    typedef scored_string_pool::iterator iterator_type;
+
+    autocomplete4() {
+        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
+        m_topk_completion_set.resize(constants::MAX_K,
+                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
+    }
+
+    autocomplete4(parameters const& params, float c)
+        : autocomplete4() {
+        typename Completions::builder cm_builder(params);
+        typename Dictionary::builder di_builder(params);
+        typename BlockedInvertedIndex::builder ii_builder(params, c);
+        auto const& docid_to_lexid = cm_builder.docid_to_lexid();
+        m_docid_to_lexid.build(docid_to_lexid.begin(), docid_to_lexid.size(),
+                               util::ceil_log2(params.num_completions + 1));
+        m_unsorted_docs_list.build(
+            util::invert(docid_to_lexid, params.num_completions));
+        cm_builder.build(m_completions);
+        di_builder.build(m_dictionary);
+        ii_builder.build(m_inverted_index);
+    }
+
+    template <typename Probe>
+    iterator_type prefix_topk(std::string const& query, const uint32_t k,
+                              Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = true;
+        if (!parse(m_dictionary, query, prefix, suffix, must_find_prefix)) {
+            return m_pool.begin();
+        }
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+        if (r.is_invalid()) return m_pool.begin();
+        uint32_t num_completions =
+            m_unsorted_docs_list.topk(r, k, m_pool.scores());
+        probe.stop(1);
+
+        probe.start(2);
+        extract_completions(num_completions);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    template <typename Probe>
+    iterator_type conjunctive_topk(std::string const& query, const uint32_t k,
+                                   Probe& probe) {
+        assert(k <= constants::MAX_K);
+
+        probe.start(0);
+        init();
+        completion_type prefix;
+        byte_range suffix;
+        constexpr bool must_find_prefix = false;
+        parse(m_dictionary, query, prefix, suffix, must_find_prefix);
+        probe.stop(0);
+
+        probe.start(1);
+        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+        if (suffix_lex_range.is_invalid()) return m_pool.begin();
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        uint32_t num_completions =
+            conjunctive_topk(prefix, suffix_lex_range, k);
+        probe.stop(1);
+
+        probe.start(2);
+        extract_completions(num_completions);
+        auto it = extract_strings(num_completions);
+        probe.stop(2);
+
+        return it;
+    }
+
+    // iterator_type topk(std::string const& query, const uint32_t k) {
+    //     assert(k <= constants::MAX_K);
+    //     init();
+    //     completion_type prefix;
+    //     byte_range suffix;
+    //     parse(m_dictionary, query, prefix, suffix);
+
+    //     range suffix_lex_range = m_dictionary.locate_prefix(suffix);
+    //     if (suffix_lex_range.is_invalid()) return m_pool.begin();
+
+    //     suffix_lex_range.begin += 1;
+    //     suffix_lex_range.end += 1;
+    //     range r = m_completions.locate_prefix(prefix, suffix_lex_range);
+
+    //     uint32_t num_completions = 0;
+    //     if (r.is_valid()) {
+    //         num_completions = m_unsorted_docs_list.topk(r, k,
+    //         m_pool.scores());
+    //     }
+
+    //     if (num_completions < k) {
+    //         num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
+    //     }
+
+    //     extract_completions(num_completions);
+    //     return extract_strings(num_completions);
+    // }
+
+    size_t bytes() const {
+        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
+               m_dictionary.bytes() + m_docid_to_lexid.bytes() +
+               m_inverted_index.bytes();
+    }
+
+    void print_stats() const;
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_completions);
+        visitor.visit(m_unsorted_docs_list);
+        visitor.visit(m_dictionary);
+        visitor.visit(m_inverted_index);
+        visitor.visit(m_docid_to_lexid);
+    }
+
+private:
+    Completions m_completions;
+    unsorted_list_type m_unsorted_docs_list;
+    Dictionary m_dictionary;
+    BlockedInvertedIndex m_inverted_index;
+    compact_vector m_docid_to_lexid;
+
+    scored_string_pool m_pool;
+    completion_set m_topk_completion_set;
+
+    void init() {
+        m_pool.clear();
+        m_pool.init();
+        assert(m_pool.size() == 0);
+    }
+
+    void extract_completions(const uint32_t num_completions) {
+        auto const& topk_scores = m_pool.scores();
+        auto& completions = m_topk_completion_set.completions();
+        auto& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto doc_id = topk_scores[i];
+            auto lex_id = m_docid_to_lexid[doc_id];
+            uint8_t size = m_completions.extract(lex_id, completions[i]);
+            sizes[i] = size;
+        }
+    }
+
+    typedef typename BlockedInvertedIndex::block_type block_t;
+
+    struct block_type_comparator {
+        bool operator()(block_t& l, block_t& r) {
+            return l.docs_iterator.operator*() > r.docs_iterator.operator*();
+        }
+    };
+
+    uint32_t conjunctive_topk(completion_type& prefix, const range suffix,
+                              const uint32_t k) {
+        auto& topk_scores = m_pool.scores();
+
+        typedef min_heap<block_t, block_type_comparator>
+            min_priority_queue_type;
+        min_priority_queue_type q;
+        uint32_t current_block_id = m_inverted_index.block_id(suffix.begin);
+        uint32_t current_block_boundary =
+            m_inverted_index.block_boundary(current_block_id);
+        for (uint32_t i = suffix.begin; i != suffix.end; ++i) {
+            assert(i > 0);
+            if (i > current_block_boundary) {
+                q.push_back(m_inverted_index.block(current_block_id));
+                current_block_id += 1;
+                current_block_boundary =
+                    m_inverted_index.block_boundary(current_block_id);
+            }
+        }
+        q.push_back(m_inverted_index.block(current_block_id));
+        q.make_heap();
+
+        uint32_t results = 0;
+
+        auto check = [&](block_t& block, id_type doc_id) {
+            uint64_t pos = block.docs_iterator.position();
+            assert(block.docs_iterator.access(pos) == doc_id);
+            uint64_t begin = block.offsets_iterator.access(pos);
+            uint64_t end = block.offsets_iterator.access(pos + 1);
+            assert(end > begin);
+            for (uint64_t i = begin; i != end; ++i) {
+                auto t = block.terms_iterator.access(i) + block.lower_bound;
+                if (t > suffix.end) break;
+                if (suffix.contains(t)) {
+                    topk_scores[results++] = doc_id;
+                    break;
+                }
+            }
+        };
+
+        if (prefix.size() == 0) {
+            while (!q.empty()) {
+                auto& z = q.top();
+                auto doc_id = z.docs_iterator.operator*();
+                check(z, doc_id);
+                if (results == k) return results;
+                z.docs_iterator.next();
+                if (!z.docs_iterator.has_next()) q.pop();
+                q.heapify();
+            }
+        } else {
+            deduplicate(prefix);
+            auto it = m_inverted_index.intersection_iterator(prefix, suffix);
+            for (; it.has_next() and !q.empty(); ++it) {
+                auto doc_id = *it;
+                while (!q.empty()) {
+                    auto& z = q.top();
+                    auto val = z.docs_iterator.operator*();
+                    if (val > doc_id) break;
+                    if (val < doc_id) {
+                        val = z.docs_iterator.next_geq(doc_id);
+                        if (!z.docs_iterator.has_next()) {
+                            q.pop();
+                        } else {
+                            q.heapify();
+                        }
+                    } else {
+                        if (val == doc_id) {
+                            check(z, doc_id);
+                            if (results == k) return results;
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        return results;
+    }
+
+    iterator_type extract_strings(const uint32_t num_completions) {
+        auto const& completions = m_topk_completion_set.completions();
+        auto const& sizes = m_topk_completion_set.sizes();
+        for (uint32_t i = 0; i != num_completions; ++i) {
+            auto const& c = completions[i];
+            uint32_t size = sizes[i];
+            uint64_t offset = m_pool.bytes();
+            uint8_t* decoded = m_pool.data() + offset;
+            for (uint32_t j = 0; j != size; ++j) {
+                auto term_id = c[j];
+                uint8_t len = m_dictionary.extract(term_id, decoded);
+                decoded += len;
+                offset += len;
+                if (j != size - 1) {
+                    *decoded++ = ' ';
+                    offset++;
+                }
+            }
+            m_pool.push_back_offset(offset);
+        }
+        assert(m_pool.size() == num_completions);
+        return m_pool.begin();
+    }
+};
+
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/include/autocomplete_common.hpp b/archive/include/autocomplete_common.hpp
new file mode 100644
index 0000000..21d952b
--- /dev/null
+++ b/archive/include/autocomplete_common.hpp
@@ -0,0 +1,72 @@
+#pragma once
+
+#include "util_types.hpp"
+#include "min_heap.hpp"
+#include "unsorted_list.hpp"
+#include "minimal_docids.hpp"
+#include "succinct_rmq/cartesian_tree.hpp"
+
+namespace autocomplete {
+
+typedef unsorted_list<cartesian_tree> unsorted_list_type;
+
+template <typename Dictionary>
+bool parse(Dictionary const& dict, std::string const& query,
+           completion_type& prefix, byte_range& suffix, bool must_find_prefix) {
+    byte_range_iterator it(string_to_byte_range(query));
+    while (true) {
+        suffix = it.next();
+        if (!it.has_next()) break;
+        auto term_id = dict.locate(suffix);
+        if (term_id != global::invalid_term_id) {
+            prefix.push_back(term_id);
+        } else {
+            if (must_find_prefix) return false;
+        }
+    }
+    return true;
+}
+
+void deduplicate(completion_type& c) {
+    std::sort(c.begin(), c.end());
+    auto end = std::unique(c.begin(), c.end());
+    c.resize(std::distance(c.begin(), end));
+}
+
+template <typename InvertedIndex>
+uint32_t heap_topk(InvertedIndex const& index, const range r, const uint32_t k,
+                   std::vector<id_type>& topk_scores) {
+    assert(r.is_valid());
+
+    typedef min_heap<typename InvertedIndex::iterator_type,
+                     iterator_comparator<typename InvertedIndex::iterator_type>>
+        min_priority_queue_type;
+
+    min_priority_queue_type q;
+    q.reserve(r.end - r.begin + 1);  // inclusive range
+    assert(r.begin > 0);
+    for (uint64_t term_id = r.begin; term_id <= r.end; ++term_id) {
+        q.push_back(index.iterator(term_id - 1));
+    }
+    q.make_heap();
+
+    uint32_t results = 0;
+
+    while (!q.empty()) {
+        auto& z = q.top();
+        auto doc_id = *z;
+        bool alread_present = std::binary_search(
+            topk_scores.begin(), topk_scores.begin() + results, doc_id);
+        if (!alread_present) {
+            topk_scores[results++] = doc_id;
+            if (results == k) return results;
+        }
+        z.next();
+        if (!z.has_next()) q.pop();
+        q.heapify();
+    }
+
+    return results;
+}
+
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/bit_vector.hpp b/archive/include/bit_vector.hpp
similarity index 98%
rename from include/bit_vector.hpp
rename to archive/include/bit_vector.hpp
index 676c112..4afb7dd 100644
--- a/include/bit_vector.hpp
+++ b/archive/include/bit_vector.hpp
@@ -242,12 +242,6 @@ struct bit_vector {
         build(in);
     }
 
-    bit_vector& operator=(bit_vector const& other) {
-        bit_vector tmp(other);
-        tmp.swap(*this);
-        return *this;
-    }
-
     void swap(bit_vector& other) {
         std::swap(other.m_size, m_size);
         other.m_bits.swap(m_bits);
@@ -412,6 +406,7 @@ struct bits_getter {
         , m_base(offset)
         , m_width(width)
         , m_mask(-(width == 64) | ((uint64_t(1) << width) - 1)) {
+        assert(width > 0);
         util::prefetch(m_data + m_base / 64);
     }
 
diff --git a/include/blocked_inverted_index.hpp b/archive/include/blocked_inverted_index.hpp
similarity index 73%
rename from include/blocked_inverted_index.hpp
rename to archive/include/blocked_inverted_index.hpp
index 79319fe..2f1af3a 100644
--- a/include/blocked_inverted_index.hpp
+++ b/archive/include/blocked_inverted_index.hpp
@@ -21,9 +21,12 @@ struct blocked_inverted_index {
 
         builder(parameters const& params, float c)
             : m_num_integers(0)
-            , m_num_docs(params.num_completions)
+            , m_num_docs(params.universe)
             , m_num_terms(params.num_terms) {
-            assert(c > 0.0);
+            if (!(c > 0.0 and c <= 1.0)) {
+                throw std::runtime_error("c must be in (0,1]");
+            }
+
             essentials::logger("building blocked_inverted_index with c = " +
                                std::to_string(c) + "...");
 
@@ -115,6 +118,7 @@ struct blocked_inverted_index {
                     auto max =
                         *std::max_element(term_list.begin(), term_list.end());
                     uint64_t width = util::ceil_log2(max + 1);
+                    if (width == 0) width = 1;
                     // std::cout << "using " << width << " [bpi]" << std::endl;
                     m_terms.append_bits(width, 6);
                     for (auto t : term_list) m_terms.append_bits(t, width);
@@ -248,6 +252,11 @@ struct blocked_inverted_index {
         return id;
     }
 
+    uint32_t block_boundary(uint32_t block_id) const {
+        assert(block_id < m_blocks.size());
+        return m_blocks[block_id];
+    }
+
     struct block_type {
         docs_iterator_type docs_iterator;
         offsets_iterator_type offsets_iterator;
@@ -263,61 +272,45 @@ struct blocked_inverted_index {
             : m_i(0)
             , m_num_docs(ii->num_docs())
             , m_suffix(r) {
-            assert(!r.is_invalid());
-
-            if (!term_ids.empty()) {
-                m_iterators.reserve(term_ids.size());  // at most
-                std::sort(term_ids.begin(), term_ids.end());
-                uint32_t current_block_id = ii->block_id(term_ids.front());
-                uint32_t i = 0;
-                uint32_t prev_i = 0;
-                for (; i != term_ids.size(); ++i) {
-                    auto term_id = term_ids[i];
-                    assert(term_id > 0);
-                    uint32_t b = ii->block_id(term_id);
-                    if (b > current_block_id) {
-                        auto block = ii->block(current_block_id);
-                        block.term_ids.reserve(term_ids.size());  // at most
-                        for (; prev_i != i; ++prev_i) {
-                            block.term_ids.push_back(term_ids[prev_i]);
-                        }
-                        m_iterators.push_back(std::move(block));
+            assert(r.is_valid());
+            assert(!term_ids.empty());
+            assert(std::is_sorted(term_ids.begin(), term_ids.end()));
+            assert(std::unique(term_ids.begin(), term_ids.end()) ==
+                   term_ids.end());
+
+            m_blocks.reserve(term_ids.size());  // at most
+            uint32_t current_block_id = ii->block_id(term_ids.front());
+            uint32_t i = 0;
+            uint32_t prev_i = 0;
+            for (; i != term_ids.size(); ++i) {
+                auto term_id = term_ids[i];
+                assert(term_id > 0);
+                uint32_t b = ii->block_id(term_id);
+                if (b > current_block_id) {
+                    auto block = ii->block(current_block_id);
+                    block.term_ids.reserve(term_ids.size());  // at most
+                    for (; prev_i != i; ++prev_i) {
+                        block.term_ids.push_back(term_ids[prev_i]);
                     }
-                    current_block_id = b;
+                    m_blocks.push_back(std::move(block));
                 }
-
-                auto block = ii->block(current_block_id);
-                block.term_ids.reserve(term_ids.size());  // at most
-                for (; prev_i != i; ++prev_i) {
-                    block.term_ids.push_back(term_ids[prev_i]);
-                }
-                m_iterators.push_back(std::move(block));
-
-                assert(m_iterators.size() > 0);
-                std::sort(m_iterators.begin(), m_iterators.end(),
-                          [](auto const& l, auto const& r) {
-                              return l.docs_iterator.size() <
-                                     r.docs_iterator.size();
-                          });
-
-                m_candidate = m_iterators[0].docs_iterator.access(0);
-            } else {
-                m_candidate = 0;
+                current_block_id = b;
             }
 
-            {
-                uint32_t current_block_id = ii->block_id(r.begin);
-                uint32_t i = r.begin;
-                for (; i != r.end; ++i) {
-                    assert(i > 0);
-                    uint32_t b = ii->block_id(i);
-                    if (b > current_block_id) {
-                        m_range.push_back(ii->block(current_block_id));
-                    }
-                    current_block_id = b;
-                }
-                m_range.push_back(ii->block(current_block_id));
+            auto block = ii->block(current_block_id);
+            block.term_ids.reserve(term_ids.size());  // at most
+            for (; prev_i != i; ++prev_i) {
+                block.term_ids.push_back(term_ids[prev_i]);
             }
+            m_blocks.push_back(std::move(block));
+
+            std::sort(m_blocks.begin(), m_blocks.end(),
+                      [](auto const& l, auto const& r) {
+                          return l.docs_iterator.size() <
+                                 r.docs_iterator.size();
+                      });
+
+            m_candidate = m_blocks[0].docs_iterator.access(0);
 
             next();
         }
@@ -331,62 +324,37 @@ struct blocked_inverted_index {
         }
 
         void operator++() {
-            assert(m_i == m_iterators.size());
-            if (!m_iterators.empty()) {
-                if (m_iterators.size() > 1) {
-                    m_candidate = m_iterators[0].docs_iterator.next();
-                }
-            } else {
-                m_candidate += 1;
+            assert(m_i == m_blocks.size());
+            if (m_blocks.size() > 1) {
+                m_candidate = m_blocks[0].docs_iterator.next();
             }
             m_i = 0;
             next();
         }
 
-        bool intersects() {
-            for (auto& block : m_range) {
-                uint64_t val = block.docs_iterator.next_geq(m_candidate);
-                if (val == m_candidate) {
-                    uint64_t pos = block.docs_iterator.position();
-                    assert(block.docs_iterator.access(pos) == m_candidate);
-                    uint64_t begin = block.offsets_iterator.access(pos);
-                    uint64_t end = block.offsets_iterator.access(pos + 1);
-                    assert(end > begin);
-                    uint32_t lower_bound = block.lower_bound;
-                    for (uint64_t i = begin; i != end; ++i) {
-                        auto t = block.terms_iterator.access(i) + lower_bound;
-                        if (t > m_suffix.end) break;
-                        if (m_suffix.contains(t)) return true;
-                    }
-                }
-            }
-            return false;
-        }
-
     private:
         id_type m_candidate;
         size_t m_i;
         uint64_t m_num_docs;
-        std::vector<block_type> m_iterators;
+        std::vector<block_type> m_blocks;
         std::vector<block_type> m_range;
         range m_suffix;
 
         bool in() {  // is candidate doc in intersection?
 
-            uint64_t pos = m_iterators[m_i].docs_iterator.position();
-            if (pos == m_iterators[m_i].docs_iterator.size()) return false;
-            uint64_t begin = m_iterators[m_i].offsets_iterator.access(pos);
-            uint64_t end = m_iterators[m_i].offsets_iterator.access(pos + 1);
+            auto& b = m_blocks[m_i];
+            uint64_t pos = b.docs_iterator.position();
+            if (pos == b.docs_iterator.size()) return false;
+            uint64_t begin = b.offsets_iterator.access(pos);
+            uint64_t end = b.offsets_iterator.access(pos + 1);
             assert(end > begin);
-            if (end - begin < m_iterators[m_i].term_ids.size()) return false;
+            if (end - begin < b.term_ids.size()) return false;
 
             uint64_t i = begin;
-            uint32_t lower_bound = m_iterators[m_i].lower_bound;
-            for (auto x : m_iterators[m_i].term_ids) {
+            for (auto x : b.term_ids) {
                 bool found = false;
                 for (; i != end; ++i) {
-                    auto t =
-                        m_iterators[m_i].terms_iterator.access(i) + lower_bound;
+                    auto t = b.terms_iterator.access(i) + b.lower_bound;
                     if (t == x) {
                         found = true;
                         break;
@@ -399,18 +367,17 @@ struct blocked_inverted_index {
         }
 
         void next() {
-            if (m_iterators.empty()) return;
-            if (m_iterators.size() == 1) {
-                while (m_candidate < m_num_docs and m_i != m_iterators.size()) {
+            if (m_blocks.size() == 1) {
+                while (m_candidate < m_num_docs and m_i != m_blocks.size()) {
                     assert(m_i == 0);
-                    m_candidate = m_iterators[m_i].docs_iterator.next();
+                    m_candidate = m_blocks[m_i].docs_iterator.next();
                     if (in()) ++m_i;
                 }
             } else {
-                while (m_candidate < m_num_docs and m_i != m_iterators.size()) {
+                while (m_candidate < m_num_docs and m_i != m_blocks.size()) {
                     // NOTE: since we work with unions of posting lists,
                     // next_geq by scan runs faster
-                    auto val = m_iterators[m_i].docs_iterator.next_geq_by_scan(
+                    auto val = m_blocks[m_i].docs_iterator.next_geq_by_scan(
                         m_candidate);
                     bool is_in = in();
                     if (val == m_candidate and is_in) {
@@ -429,34 +396,6 @@ struct blocked_inverted_index {
         return intersection_iterator_type(this, term_ids, r);
     }
 
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_num_integers);
-        visitor.visit(m_num_docs);
-        visitor.visit(m_num_terms);
-        visitor.visit(m_blocks);
-        visitor.visit(m_pointers_to_lists);
-        visitor.visit(m_lists);
-        visitor.visit(m_pointers_to_offsets);
-        visitor.visit(m_offsets);
-        visitor.visit(m_pointers_to_terms);
-        visitor.visit(m_terms);
-    }
-
-private:
-    uint64_t m_num_integers;
-    uint64_t m_num_docs;
-    uint64_t m_num_terms;
-
-    std::vector<uint32_t> m_blocks;
-
-    ef::ef_sequence m_pointers_to_lists;
-    bit_vector m_lists;
-    ef::ef_sequence m_pointers_to_offsets;
-    bit_vector m_offsets;
-    ef::ef_sequence m_pointers_to_terms;
-    bit_vector m_terms;
-
     block_type block(uint32_t block_id) const {
         assert(block_id < num_blocks());
         block_type b;
@@ -485,6 +424,34 @@ struct blocked_inverted_index {
 
         return b;
     }
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_num_integers);
+        visitor.visit(m_num_docs);
+        visitor.visit(m_num_terms);
+        visitor.visit(m_blocks);
+        visitor.visit(m_pointers_to_lists);
+        visitor.visit(m_lists);
+        visitor.visit(m_pointers_to_offsets);
+        visitor.visit(m_offsets);
+        visitor.visit(m_pointers_to_terms);
+        visitor.visit(m_terms);
+    }
+
+private:
+    uint64_t m_num_integers;
+    uint64_t m_num_docs;
+    uint64_t m_num_terms;
+
+    std::vector<uint32_t> m_blocks;
+
+    ef::ef_sequence m_pointers_to_lists;
+    bit_vector m_lists;
+    ef::ef_sequence m_pointers_to_offsets;
+    bit_vector m_offsets;
+    ef::ef_sequence m_pointers_to_terms;
+    bit_vector m_terms;
 };
 
 }  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/include/building_util.hpp b/archive/include/building_util.hpp
new file mode 100644
index 0000000..0398879
--- /dev/null
+++ b/archive/include/building_util.hpp
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "util.hpp"
+#include "bit_vector.hpp"
+
+namespace autocomplete {
+namespace util {
+
+std::vector<id_type> invert(std::vector<id_type> const& docid_to_lexid,
+                            uint64_t size) {
+    std::vector<id_type> lexid_to_docid(size);
+    for (uint64_t doc_id = 0; doc_id != docid_to_lexid.size(); ++doc_id) {
+        if (docid_to_lexid[doc_id] < size) {
+            lexid_to_docid[docid_to_lexid[doc_id]] = doc_id;
+        }
+    }
+    return lexid_to_docid;
+}
+
+void push_pad(bit_vector_builder& bvb, uint64_t alignment = 8) {
+    uint64_t mod = bvb.size() % alignment;
+    if (mod) {
+        uint64_t pad = alignment - mod;
+        bvb.append_bits(0, pad);
+        assert(bvb.size() % alignment == 0);
+    }
+}
+
+void eat_pad(bits_iterator<bit_vector>& it, uint64_t alignment = 8) {
+    uint64_t mod = it.position() % alignment;
+    if (mod) {
+        uint64_t pad = alignment - mod;
+        it.get_bits(pad);
+        assert(it.position() % alignment == 0);
+    }
+}
+
+}  // namespace util
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/compact_forward_index.hpp b/archive/include/compact_forward_index.hpp
similarity index 92%
rename from include/compact_forward_index.hpp
rename to archive/include/compact_forward_index.hpp
index 74ad769..50267f4 100644
--- a/include/compact_forward_index.hpp
+++ b/archive/include/compact_forward_index.hpp
@@ -14,25 +14,25 @@ struct compact_forward_index {
             : m_num_integers(0)
             , m_num_terms(params.num_terms) {
             essentials::logger("building forward_index...");
-            uint64_t num_completions = params.num_completions;
+            uint64_t universe = params.universe;
             std::ifstream input(
                 (params.collection_basename + ".forward").c_str(),
                 std::ios_base::in);
-
-            std::vector<uint32_t> terms;
-            terms.reserve(params.num_completions *
+            std::vector<id_type> terms;
+            terms.reserve(universe *
                           constants::MAX_NUM_TERMS_PER_QUERY);  // at most
             uint64_t size = 0;
             m_pointers.push_back(0);
-            for (uint64_t i = 0; i != num_completions; ++i) {
+            for (uint64_t i = 0; i != universe; ++i) {
                 uint32_t n = 0;
                 input >> n;
-                assert(n > 0 and n < constants::MAX_NUM_TERMS_PER_QUERY);
+                assert(n < constants::MAX_NUM_TERMS_PER_QUERY);
                 m_num_integers += n;
                 size += n;
                 for (uint64_t k = 0; k != n; ++k) {
                     id_type x;
                     input >> x;
+                    assert(x > 0);
                     terms.push_back(x);
                 }
                 m_pointers.push_back(size);
@@ -90,6 +90,7 @@ struct compact_forward_index {
         bool intersects(const range r) const {
             for (uint64_t i = 0; i != size(); ++i) {
                 auto val = m_cv[m_base + i];
+                assert(val > 0);
                 if (r.contains(val)) return true;
             }
             return false;
@@ -103,6 +104,7 @@ struct compact_forward_index {
     };
 
     forward_list_iterator_type iterator(id_type doc_id) {
+        assert(doc_id < num_docs());
         uint64_t pos = m_pointers.access(doc_id);
         uint64_t n = m_pointers.access(doc_id + 1) - pos;
         return {m_data, pos, n};
diff --git a/include/compact_vector.hpp b/archive/include/compact_vector.hpp
similarity index 89%
rename from include/compact_vector.hpp
rename to archive/include/compact_vector.hpp
index f0cd1bd..ac8e275 100644
--- a/include/compact_vector.hpp
+++ b/archive/include/compact_vector.hpp
@@ -73,26 +73,33 @@ struct compact_vector {
     };
 
     struct builder {
-        builder(uint64_t n = 0, uint64_t w = 0)
+        builder()
+            : m_back(0)
+            , m_cur_block(0)
+            , m_cur_shift(0) {}
+
+        builder(uint64_t n, uint64_t w)
             : m_size(n)
-            , m_width(!w ? w + 1 : w)
+            , m_width(w)
             , m_mask(-(w == 64) | ((1ULL << w) - 1))
             , m_back(0)
             , m_cur_block(0)
             , m_cur_shift(0)
             , m_bits(essentials::words_for(m_size * m_width), 0) {
-            if (m_width > 64) {
-                throw std::runtime_error("width must be <= 64");
+            if (m_width == 0 or m_width > 64) {
+                throw std::runtime_error("width must be > 0 and <= 64");
             }
         }
 
         void resize(size_t n, uint64_t w) {
             m_size = n;
-            m_width = !w ? w + 1 : w;
-            if (m_width > 64) {
-                throw std::runtime_error("width must be <= 64");
+            m_width = w;
+            if (m_width == 0 or m_width > 64) {
+                throw std::runtime_error("width must be > 0 and <= 64");
             }
             m_mask = -(w == 64) | ((uint64_t(1) << w) - 1);
+            std::cout << "using " << essentials::words_for(m_size * m_width)
+                      << " words" << std::endl;
             m_bits.resize(essentials::words_for(m_size * m_width), 0);
         }
 
@@ -108,7 +115,7 @@ struct compact_vector {
                 throw std::runtime_error("width must be greater than 0");
             }
 
-            for (uint64_t i = 0; i < n; ++i, ++begin) {
+            for (uint64_t i = 0; i != n; ++i, ++begin) {
                 push_back(*begin);
             }
         }
@@ -220,8 +227,13 @@ struct compact_vector {
     void build(Iterator begin, uint64_t n) {
         uint64_t max = *std::max_element(begin, begin + n);
         uint64_t width = util::ceil_log2(max + 1);
-        std::cout << "\tusing " << width << " [bpi]" << std::endl;
-        compact_vector::builder builder(begin, n, width);
+        build(begin, n, width);
+    }
+
+    template <typename Iterator>
+    void build(Iterator begin, uint64_t n, uint64_t w) {
+        std::cout << "\tusing " << w << " [bpi]" << std::endl;
+        compact_vector::builder builder(begin, n, w);
         builder.build(*this);
     }
 
@@ -277,7 +289,7 @@ struct compact_vector {
     }
 
     uint64_t find(const range r, uint64_t id) {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         assert(r.end <= size());
         return util::find(*this, id, r.begin, r.end - 1);
     }
@@ -312,4 +324,5 @@ struct compact_vector {
     uint64_t m_mask;
     std::vector<uint64_t> m_bits;
 };
+
 }  // namespace autocomplete
diff --git a/include/completion_trie.hpp b/archive/include/completion_trie.hpp
similarity index 97%
rename from include/completion_trie.hpp
rename to archive/include/completion_trie.hpp
index 8ae9036..2bc68ea 100644
--- a/include/completion_trie.hpp
+++ b/archive/include/completion_trie.hpp
@@ -166,16 +166,16 @@ struct completion_trie {
     completion_trie() {}
 
     // If the last token of the query is not completely specified,
-    // then we search for its lexicographic range among the children of c.
+    // then we search for its lexicographic range among the children of prefix.
     // Return [a,b)
-    range locate_prefix(completion_type const& c,
+    range locate_prefix(completion_type const& prefix,
                         range suffix_lex_range) const {
-        range r{global::not_found, global::not_found};
+        range r = global::invalid_range;
         range pointer{0, m_nodes.front().size()};
         uint32_t i = 0;
 
-        for (; i < c.size(); ++i) {
-            uint64_t pos = m_nodes[i].find(pointer, c[i]);
+        for (; i < prefix.size(); ++i) {
+            uint64_t pos = m_nodes[i].find(pointer, prefix[i]);
             if (pos == global::not_found) return global::invalid_range;
             pointer = m_pointers[i][pos];
         }
@@ -195,10 +195,11 @@ struct completion_trie {
             r.end += size;
         }
 
-        assert(r.end > r.begin);
+        assert(r.is_valid());
         return r;
     }
 
+    // NOTE: not used
     bool is_member(completion_type const& c) const {
         assert(c.size() > 0);
         range pointer{0, m_nodes.front().size()};
diff --git a/include/constants.hpp b/archive/include/constants.hpp
similarity index 100%
rename from include/constants.hpp
rename to archive/include/constants.hpp
diff --git a/include/ef/compact_ef.hpp b/archive/include/ef/compact_ef.hpp
similarity index 100%
rename from include/ef/compact_ef.hpp
rename to archive/include/ef/compact_ef.hpp
diff --git a/include/ef/darray.hpp b/archive/include/ef/darray.hpp
similarity index 100%
rename from include/ef/darray.hpp
rename to archive/include/ef/darray.hpp
diff --git a/include/ef/ef_parameters.hpp b/archive/include/ef/ef_parameters.hpp
similarity index 100%
rename from include/ef/ef_parameters.hpp
rename to archive/include/ef/ef_parameters.hpp
diff --git a/include/ef/ef_sequence.hpp b/archive/include/ef/ef_sequence.hpp
similarity index 93%
rename from include/ef/ef_sequence.hpp
rename to archive/include/ef/ef_sequence.hpp
index 10970d6..2e9e293 100644
--- a/include/ef/ef_sequence.hpp
+++ b/archive/include/ef/ef_sequence.hpp
@@ -49,6 +49,7 @@ struct ef_sequence {
             ++within;
         }
         assert(values.size() == n);
+        assert(std::is_sorted(values.begin(), values.end()));
         compress(values.begin(), values.size(), values.back());
     }
 
@@ -142,33 +143,25 @@ struct ef_sequence {
     }
 
     uint64_t find(const range r, uint64_t id) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         assert(r.end <= size());
         uint64_t prev_upper = previous_range_upperbound(r);
         return util::find(*this, id + prev_upper, r.begin, r.end - 1);
     }
 
     range find(const range r, const range lex) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         assert(r.end <= size());
         auto prev_upper = previous_range_upperbound(r);
-
-        uint64_t begin =
-            util::next_geq(*this, lex.begin + prev_upper, r.begin, r.end - 1);
-        if (begin == global::not_found) {
+        uint64_t id_begin = lex.begin + prev_upper;
+        uint64_t id_end = lex.end + prev_upper;
+        uint64_t begin = util::next_geq(*this, id_begin, r.begin, r.end - 1);
+        if (begin == global::not_found or access(begin) > id_end) {
             return {r.end, r.end};
         }
-
-        if (lex.begin == lex.end) {
-            return {begin, begin + 1};
-        }
-
-        uint64_t id_end = lex.end + prev_upper;
+        if (lex.begin == lex.end) return {begin, begin + 1};
         uint64_t end = util::next_geq(*this, id_end, begin, r.end - 1);
-        if (end == global::not_found) {
-            return {begin, r.end};
-        }
-
+        if (end == global::not_found) return {begin, r.end};
         return {begin, access(end) != id_end ? end : end + 1};
     }
 
@@ -251,7 +244,7 @@ struct ef_sequence {
     }
 
     uint64_t previous_range_upperbound(const range r) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         return r.begin ? access(r.begin - 1) : 0;
     }
 };
diff --git a/include/fc_dictionary.hpp b/archive/include/fc_dictionary.hpp
similarity index 95%
rename from include/fc_dictionary.hpp
rename to archive/include/fc_dictionary.hpp
index 271f970..52e3971 100644
--- a/include/fc_dictionary.hpp
+++ b/archive/include/fc_dictionary.hpp
@@ -37,14 +37,17 @@ struct fc_dictionary {
             std::string curr;
             std::string header;
 
+            uint64_t total_characters = 0;
             for (uint32_t b = 0; b != buckets; ++b) {
                 input >> header;
+                total_characters += header.size();
                 write_header(header);
                 m_pointers_to_headers.push_back(m_headers.size());
                 prev.swap(header);
                 uint32_t size = b != buckets - 1 ? BucketSize : tail;
                 for (uint32_t i = 0; i != size; ++i) {
                     input >> curr;
+                    total_characters += curr.size();
                     uint32_t l = 0;  // |lcp(curr,prev)|
                     while (l != curr.size() and l != prev.size() and
                            curr[l] == prev[l]) {
@@ -61,6 +64,9 @@ struct fc_dictionary {
                 m_buckets.push_back(0);
             }
 
+            std::cout << static_cast<double>(total_characters) / m_size
+                      << " characters per string" << std::endl;
+
             input.close();
             essentials::logger("DONE");
         }
@@ -109,6 +115,7 @@ struct fc_dictionary {
     fc_dictionary() {}
 
     // NOTE: return inclusive ranges, i.e., [a,b]
+    // 0-based ids
     range locate_prefix(byte_range p) const {
         if (p.end - p.begin == 0) return {0, size() - 1};
         auto bucket_id = locate_buckets(p);
@@ -223,7 +230,7 @@ struct fc_dictionary {
         if (cmp < 0) {
             bucket_id = mi;
         } else {
-            bucket_id = mi - 1;
+            bucket_id = hi == -1 ? 0 : hi;
             h = header(bucket_id);
         }
 
@@ -307,10 +314,13 @@ struct fc_dictionary {
 
         // NOTE 1: excluding null terminators, allow us to use memcpy here
         // because we know exactly how many bytes to copy: this is much faster
-        // than looping until we hit '\0'. NOTE 2: always copying a fixed amount
+        // than looping until we hit '\0'.
+
+        // NOTE 2: always copying a fixed amount
         // of bytes (constants::MAX_NUM_CHARS_PER_QUERY) is much faster than
         // copying an exact amount, e.g., suffix_len (althoung it could be
         // less), so do not do: memcpy(out+ l, in, suffix_len).
+
         memcpy(out + l, in, constants::MAX_NUM_CHARS_PER_QUERY);
 
         return l + suffix_len;
@@ -340,8 +350,7 @@ struct fc_dictionary {
             if (cmp < 0) return global::invalid_term_id;
             curr += l - lcp_len + 2;
         }
-        assert(false);
-        __builtin_unreachable();
+        return global::invalid_term_id;  // term does not exist in dictionary
     }
 
     id_type left_locate(byte_range p, byte_range h, id_type bucket_id) const {
diff --git a/include/integer_codes.hpp b/archive/include/integer_codes.hpp
similarity index 100%
rename from include/integer_codes.hpp
rename to archive/include/integer_codes.hpp
diff --git a/include/integer_fc_dictionary.hpp b/archive/include/integer_fc_dictionary.hpp
similarity index 89%
rename from include/integer_fc_dictionary.hpp
rename to archive/include/integer_fc_dictionary.hpp
index 218cacf..29d8743 100644
--- a/include/integer_fc_dictionary.hpp
+++ b/archive/include/integer_fc_dictionary.hpp
@@ -19,7 +19,7 @@ struct integer_fc_dictionary {
             essentials::logger(
                 "building integer_fc_dictionary with bucket size " +
                 std::to_string(BucketSize) + "...");
-            m_doc_ids.reserve(params.num_completions);
+            m_docid_to_lexid.resize(params.universe, id_type(-1));
 
             uint32_t buckets = std::ceil(double(m_size) / (BucketSize + 1));
             m_pointers_to_buckets.reserve(buckets + 1);
@@ -35,9 +35,10 @@ struct integer_fc_dictionary {
                 std::ios_base::in);
             completion_iterator it(params, input);
 
+            id_type lex_id = 0;
             for (uint32_t b = 0; b != buckets; ++b) {
                 auto& header = *it;
-                m_doc_ids.push_back(header.doc_id);
+                m_docid_to_lexid[header.doc_id] = lex_id++;
                 write_header(header.completion);
                 m_pointers_to_headers.push_back(m_headers.size());
                 completion_type prev;
@@ -47,7 +48,7 @@ struct integer_fc_dictionary {
                 for (uint32_t i = 0; i != size; ++i, ++it) {
                     auto& record = *it;
                     auto& curr = record.completion;
-                    m_doc_ids.push_back(record.doc_id);
+                    m_docid_to_lexid[record.doc_id] = lex_id++;
                     uint32_t l = 0;  // |lcp(curr,prev)|
                     while (l != curr.size() and l != prev.size() and
                            curr[l] == prev[l]) {
@@ -76,7 +77,7 @@ struct integer_fc_dictionary {
             other.m_pointers_to_buckets.swap(m_pointers_to_buckets);
             other.m_headers.swap(m_headers);
             other.m_buckets.swap(m_buckets);
-            other.m_doc_ids.swap(m_doc_ids);
+            other.m_docid_to_lexid.swap(m_docid_to_lexid);
         }
 
         void build(integer_fc_dictionary<BucketSize, Pointers>& d) {
@@ -88,8 +89,8 @@ struct integer_fc_dictionary {
             builder().swap(*this);
         }
 
-        std::vector<id_type>& doc_ids() {
-            return m_doc_ids;
+        std::vector<id_type>& docid_to_lexid() {
+            return m_docid_to_lexid;
         }
 
     private:
@@ -98,7 +99,7 @@ struct integer_fc_dictionary {
         std::vector<uint64_t> m_pointers_to_buckets;
         std::vector<uint32_t> m_headers;
         std::vector<uint8_t> m_buckets;
-        std::vector<id_type> m_doc_ids;
+        std::vector<id_type> m_docid_to_lexid;
 
         void write_header(completion_type const& c) {
             assert(c.size() > 0 and
@@ -166,19 +167,20 @@ struct integer_fc_dictionary {
             prefix.push_back(global::invalid_term_id);
         }
 
-        locate_bucket(completion_to_uint32_range(prefix), h_end, bucket_id_end,
-                      bucket_id_begin  // hint
+        locate_right_bucket(completion_to_uint32_range(prefix), h_end,
+                            bucket_id_end,
+                            bucket_id_begin  // hint
         );
         uint32_t p_end = bucket_id_end * (BucketSize + 1);
         p_end += right_locate(completion_to_uint32_range(prefix), h_end,
                               bucket_id_end);
 
+        prefix.pop_back();
+
         if (p_end < p_begin) {
-            prefix.pop_back();
             return global::invalid_range;
         }
 
-        prefix.pop_back();
         if (suffix_lex_range.begin == suffix_lex_range.end) {
             prefix.pop_back();
         }
@@ -269,13 +271,37 @@ struct integer_fc_dictionary {
         if (cmp < 0) {
             bucket_id = mi;
         } else {
-            bucket_id = mi - 1;
+            bucket_id = hi == -1 ? 0 : hi;
             h = header(bucket_id);
         }
 
         return false;
     }
 
+    void locate_right_bucket(uint32_range t, uint32_range& h,
+                             id_type& bucket_id,
+                             int lower_bound_hint = 0) const {
+        int lo = lower_bound_hint, hi = buckets() - 1, mi = 0, cmp = 0;
+        size_t n = t.end - t.begin;
+        while (lo <= hi) {
+            mi = (lo + hi) / 2;
+            h = header(mi);
+            cmp = uint32_range_compare(h, t, n);
+            if (cmp > 0) {
+                hi = mi - 1;
+            } else if (cmp <= 0) {
+                lo = mi + 1;
+            }
+        }
+
+        if (cmp < 0) {
+            bucket_id = mi;
+        } else {
+            bucket_id = hi == -1 ? 0 : hi;
+            h = header(bucket_id);
+        }
+    }
+
 #define INT_FC_DICT_LOCATE_INIT                                      \
     static uint32_t decoded[2 * constants::MAX_NUM_TERMS_PER_QUERY]; \
     memcpy(decoded, h.begin, (h.end - h.begin) * sizeof(uint32_t));  \
diff --git a/include/inverted_index.hpp b/archive/include/inverted_index.hpp
similarity index 88%
rename from include/inverted_index.hpp
rename to archive/include/inverted_index.hpp
index 7c84bd7..900fd96 100644
--- a/include/inverted_index.hpp
+++ b/archive/include/inverted_index.hpp
@@ -16,7 +16,7 @@ struct inverted_index {
 
         builder(parameters const& params)
             : m_num_integers(0)
-            , m_num_docs(params.num_completions) {
+            , m_num_docs(params.universe) {
             essentials::logger("building inverted_index...");
 
             uint64_t num_terms = params.num_terms;
@@ -28,10 +28,18 @@ struct inverted_index {
 
             std::vector<id_type> list;
             m_pointers.push_back(0);
+
+            uint32_t max_list_size = 0;
+            uint32_t min_list_size = uint32_t(-1);
+
             for (uint64_t i = 0; i != num_terms; ++i) {
                 list.clear();
                 uint32_t n = 0;
                 input >> n;
+
+                if (n > max_list_size) max_list_size = n;
+                if (n < min_list_size) min_list_size = n;
+
                 list.reserve(n);
                 m_num_integers += n;
                 for (uint64_t k = 0; k != n; ++k) {
@@ -41,11 +49,17 @@ struct inverted_index {
                 }
                 m_minimal_doc_ids.push_back(list.front());
                 write_gamma_nonzero(m_bvb, n);
-                if (ListType::is_byte_aligned) util::push_pad(m_bvb);
+                if constexpr (ListType::is_byte_aligned) util::push_pad(m_bvb);
                 ListType::build(m_bvb, list.begin(), m_num_docs, list.size());
                 m_pointers.push_back(m_bvb.size());
             }
 
+            std::cout << "avg. list size = "
+                      << static_cast<double>(m_num_integers) / num_terms
+                      << std::endl;
+            std::cout << "max_list_size = " << max_list_size << std::endl;
+            std::cout << "min_list_size = " << min_list_size << std::endl;
+
             m_pointers.pop_back();
             input.close();
             essentials::logger("DONE");
@@ -86,7 +100,7 @@ struct inverted_index {
         uint64_t offset = m_pointers.access(term_id);
         bits_iterator<bit_vector> it(m_data, offset);
         uint64_t n = read_gamma_nonzero(it);
-        if (ListType::is_byte_aligned) util::eat_pad(it);
+        if constexpr (ListType::is_byte_aligned) util::eat_pad(it);
         return {m_data, it.position(), m_num_docs, n};
     }
 
diff --git a/include/min_heap.hpp b/archive/include/min_heap.hpp
similarity index 100%
rename from include/min_heap.hpp
rename to archive/include/min_heap.hpp
diff --git a/archive/include/minimal_docids.hpp b/archive/include/minimal_docids.hpp
new file mode 100644
index 0000000..a7cb8f8
--- /dev/null
+++ b/archive/include/minimal_docids.hpp
@@ -0,0 +1,131 @@
+#pragma once
+
+#include "compact_vector.hpp"
+#include "util_types.hpp"
+
+namespace autocomplete {
+
+template <typename RMQ, typename InvertedIndex>
+struct minimal_docids {
+    static const uint32_t SCAN_THRESHOLD = 64;
+    typedef scored_range_with_list_iterator<
+        typename InvertedIndex::iterator_type>
+        range_type;
+    typedef scored_range_with_list_iterator_comparator<
+        typename range_type::iterator_type>
+        comparator_range_type;
+
+    minimal_docids() {}
+
+    void build(std::vector<id_type> const& list) {
+        essentials::logger("building minimal_docids...");
+        m_rmq.build(list, std::less<id_type>());
+        m_list.build(list.begin(), list.size());
+        essentials::logger("DONE");
+    }
+
+    uint32_t topk(InvertedIndex const& index, const range r, const uint32_t k,
+                  std::vector<id_type>& topk_scores) {
+        range_type sr;
+        sr.r = {r.begin, r.end - 1};  // rmq needs inclusive ranges
+        sr.min_pos = m_rmq.rmq(sr.r.begin, sr.r.end);
+        sr.min_val = m_list.access(sr.min_pos);
+
+        m_q.clear();
+        m_q.push(sr);
+
+        uint32_t results = 0;
+        while (!m_q.empty()) {
+            auto& min = m_q.top();
+            auto docid = min.minimum();
+            bool alread_present = std::binary_search(
+                topk_scores.begin(), topk_scores.begin() + results, docid);
+            if (!alread_present) {
+                topk_scores[results++] = docid;
+                if (results == k) break;
+            }
+
+            if (min.is_open()) {
+                min.iterator.next();
+                if (!min.iterator.has_next()) {
+                    m_q.pop();
+                }
+                m_q.heapify();
+            } else {
+                // save
+                auto min_range = min.r;
+                auto min_pos = min.min_pos;
+
+                min.set_iterator(index);
+                min.iterator.next();
+                if (!min.iterator.has_next()) {
+                    m_q.pop();
+                }
+
+                m_q.heapify();
+
+                if (min_pos > 0 and min_pos - 1 >= min_range.begin) {
+                    range_type left;
+                    left.r = {min_range.begin, min_pos - 1};
+                    if (left.r.end - left.r.begin <= SCAN_THRESHOLD) {
+                        left.min_pos = rmq(left.r.begin, left.r.end);
+                    } else {
+                        left.min_pos = m_rmq.rmq(left.r.begin, left.r.end);
+                    }
+                    left.min_val = m_list.access(left.min_pos);
+                    m_q.push(left);
+                }
+
+                if (min_pos < size() - 1 and min_range.end >= min_pos + 1) {
+                    range_type right;
+                    right.r = {min_pos + 1, min_range.end};
+                    if (right.r.end - right.r.begin <= SCAN_THRESHOLD) {
+                        right.min_pos = rmq(right.r.begin, right.r.end);
+                    } else {
+                        right.min_pos = m_rmq.rmq(right.r.begin, right.r.end);
+                    }
+                    right.min_val = m_list.access(right.min_pos);
+                    m_q.push(right);
+                }
+            }
+        }
+
+        return results;
+    }
+
+    size_t size() const {
+        return m_list.size();
+    }
+
+    size_t bytes() const {
+        return m_rmq.bytes() + m_list.bytes();
+    }
+
+    template <typename Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_rmq);
+        visitor.visit(m_list);
+    }
+
+private:
+    typedef min_heap<range_type, comparator_range_type> min_priority_queue_type;
+    min_priority_queue_type m_q;
+
+    RMQ m_rmq;
+    compact_vector m_list;
+
+    uint64_t rmq(uint64_t lo, uint64_t hi) {  // inclusive endpoints
+        uint64_t pos = lo;
+        id_type min = id_type(-1);
+        for (uint64_t i = lo; i <= hi; ++i) {
+            id_type val = m_list.access(i);
+            if (val < min) {
+                min = val;
+                pos = i;
+            }
+        }
+        return pos;
+    }
+};
+
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/parameters.hpp b/archive/include/parameters.hpp
similarity index 81%
rename from include/parameters.hpp
rename to archive/include/parameters.hpp
index db44d71..d628d25 100644
--- a/include/parameters.hpp
+++ b/archive/include/parameters.hpp
@@ -24,10 +24,12 @@ struct parameters {
         input >> num_terms;
         input >> max_string_length;
         input >> num_completions;
+        input >> universe;
         input >> num_levels;
         assert(num_terms > 0);
         assert(max_string_length > 0);
         assert(num_completions > 0);
+        assert(universe >= num_completions);
         assert(num_levels > 0);
 
         if (max_string_length > constants::MAX_NUM_CHARS_PER_QUERY) {
@@ -41,14 +43,18 @@ struct parameters {
         }
 
         nodes_per_level.resize(num_levels, 0);
-        for (uint32_t i = 0; i != num_levels; ++i) {
-            input >> nodes_per_level[i];
+        uint32_t i = 0;
+        for (; i != num_levels and input; ++i) input >> nodes_per_level[i];
+        if (i != num_levels) {
+            throw std::runtime_error(
+                "File with statistics may be truncated or malformed");
         }
     }
 
     uint32_t num_terms;
     uint32_t max_string_length;
     uint32_t num_completions;
+    uint32_t universe;
     uint32_t num_levels;
     std::vector<uint32_t> nodes_per_level;
     std::string collection_basename;
diff --git a/archive/include/probe.hpp b/archive/include/probe.hpp
new file mode 100644
index 0000000..955a939
--- /dev/null
+++ b/archive/include/probe.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <vector>
+#include "util_types.hpp"
+
+namespace autocomplete {
+
+struct nop_probe {
+    inline void start(uint64_t) {}
+    inline void stop(uint64_t) {}
+};
+
+struct timer_probe {
+    timer_probe(uint64_t n)
+        : m_timers(n) {}
+
+    inline void start(uint64_t i) {
+        assert(i < m_timers.size());
+        m_timers[i].start();
+    }
+
+    inline void stop(uint64_t i) {
+        assert(i < m_timers.size());
+        m_timers[i].stop();
+    }
+
+    timer_type const& get(uint64_t i) {
+        assert(i < m_timers.size());
+        return m_timers[i];
+    }
+
+private:
+    std::vector<timer_type> m_timers;
+};
+
+}  // namespace autocomplete
diff --git a/include/scored_string_pool.hpp b/archive/include/scored_string_pool.hpp
similarity index 87%
rename from include/scored_string_pool.hpp
rename to archive/include/scored_string_pool.hpp
index f834453..3f03f06 100644
--- a/include/scored_string_pool.hpp
+++ b/archive/include/scored_string_pool.hpp
@@ -4,6 +4,11 @@
 
 namespace autocomplete {
 
+struct scored_byte_range {
+    byte_range string;
+    id_type score;
+};
+
 struct scored_string_pool {
     void init() {
         push_back_offset(0);
@@ -39,6 +44,10 @@ struct scored_string_pool {
         return m_scores;
     }
 
+    std::vector<id_type> const& const_scores() const {
+        return m_scores;
+    }
+
     scored_byte_range operator[](size_t i) const {
         assert(i < size());
         scored_byte_range sbr;
@@ -69,6 +78,10 @@ struct scored_string_pool {
             return m_pool->operator[](m_pos);
         }
 
+        scored_string_pool const* pool() const {
+            return m_pool;
+        }
+
     private:
         scored_string_pool const* m_pool;
         size_t m_pos;
diff --git a/include/statistics.hpp b/archive/include/statistics.hpp
similarity index 81%
rename from include/statistics.hpp
rename to archive/include/statistics.hpp
index a863814..42654ae 100644
--- a/include/statistics.hpp
+++ b/archive/include/statistics.hpp
@@ -10,7 +10,8 @@ namespace autocomplete {
 
 void print(std::string const& what, size_t bytes, size_t total_bytes,
            uint64_t num_completions) {
-    std::cout << "  " << what << ": " << convert(bytes, essentials::MiB)
+    std::cout << "  " << what << ": "
+              << essentials::convert(bytes, essentials::MiB)
               << " [MiB]: " << static_cast<double>(bytes) / num_completions
               << " [bytes per completion] ";
     std::cout << "(" << (bytes * 100.0) / total_bytes << "%)" << std::endl;
@@ -31,20 +32,21 @@ template <typename Nodes, typename Pointers, typename LeftExtremes,
 void completion_trie<Nodes, Pointers, LeftExtremes, Sizes>::print_stats()
     const {
     size_t total_bytes = bytes();
-    std::cout << "using " << convert(total_bytes, essentials::MiB) << " [MiB]"
-              << std::endl;
+    std::cout << "using " << essentials::convert(total_bytes, essentials::MiB)
+              << " [MiB]" << std::endl;
     print_bps("nodes", nodes_bytes(), size());
     print_bps("pointers", pointers_bytes(), size());
     print_bps("left extremes", left_extremes_bytes(), size());
     print_bps("sizes", sizes_bytes(), size());
 }
 
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex, typename ForwardIndex>
-void autocomplete<Completions, UnsortedDocsList, Dictionary, InvertedIndex,
+template <typename Completions, typename Dictionary, typename InvertedIndex,
+          typename ForwardIndex>
+void autocomplete<Completions, Dictionary, InvertedIndex,
                   ForwardIndex>::print_stats() const {
     size_t total_bytes = bytes();
-    std::cout << "using " << convert(total_bytes, essentials::MiB) << " [MiB]: "
+    std::cout << "using " << essentials::convert(total_bytes, essentials::MiB)
+              << " [MiB]: "
               << static_cast<double>(total_bytes) / m_completions.size()
               << " [bytes per completion] " << std::endl;
 
@@ -74,18 +76,22 @@ void autocomplete<Completions, UnsortedDocsList, Dictionary, InvertedIndex,
               m_inverted_index.num_integers());
     print("forward index", m_forward_index.bytes(), total_bytes,
           m_completions.size());
+    std::cout << "\tavg. number of terms per completion: "
+              << static_cast<double>(m_forward_index.num_integers()) /
+                     m_completions.size()
+              << std::endl;
     print_bpi("data", m_forward_index.data_bytes(),
               m_forward_index.num_integers());
     print_bpi("pointers", m_forward_index.pointer_bytes(),
               m_forward_index.num_integers());
 }
 
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex>
-void autocomplete2<Completions, UnsortedDocsList, Dictionary,
-                   InvertedIndex>::print_stats() const {
+template <typename Completions, typename Dictionary, typename InvertedIndex>
+void autocomplete2<Completions, Dictionary, InvertedIndex>::print_stats()
+    const {
     size_t total_bytes = bytes();
-    std::cout << "using " << convert(total_bytes, essentials::MiB) << " [MiB]: "
+    std::cout << "using " << essentials::convert(total_bytes, essentials::MiB)
+              << " [MiB]: "
               << static_cast<double>(total_bytes) / m_completions.size()
               << " [bytes per completion] " << std::endl;
 
@@ -115,12 +121,12 @@ void autocomplete2<Completions, UnsortedDocsList, Dictionary,
           m_completions.size());
 }
 
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex>
-void autocomplete3<Completions, UnsortedDocsList, Dictionary,
-                   InvertedIndex>::print_stats() const {
+template <typename Completions, typename Dictionary, typename InvertedIndex>
+void autocomplete3<Completions, Dictionary, InvertedIndex>::print_stats()
+    const {
     size_t total_bytes = bytes();
-    std::cout << "using " << convert(total_bytes, essentials::MiB) << " [MiB]: "
+    std::cout << "using " << essentials::convert(total_bytes, essentials::MiB)
+              << " [MiB]: "
               << static_cast<double>(total_bytes) / m_completions.size()
               << " [bytes per completion] " << std::endl;
 
@@ -140,12 +146,13 @@ void autocomplete3<Completions, UnsortedDocsList, Dictionary,
           m_completions.size());
 }
 
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
+template <typename Completions, typename Dictionary,
           typename BlockedInvertedIndex>
-void autocomplete4<Completions, UnsortedDocsList, Dictionary,
-                   BlockedInvertedIndex>::print_stats() const {
+void autocomplete4<Completions, Dictionary, BlockedInvertedIndex>::print_stats()
+    const {
     size_t total_bytes = bytes();
-    std::cout << "using " << convert(total_bytes, essentials::MiB) << " [MiB]: "
+    std::cout << "using " << essentials::convert(total_bytes, essentials::MiB)
+              << " [MiB]: "
               << static_cast<double>(total_bytes) / m_completions.size()
               << " [bytes per completion] " << std::endl;
 
diff --git a/include/succinct_rmq/README.md b/archive/include/succinct_rmq/README.md
similarity index 100%
rename from include/succinct_rmq/README.md
rename to archive/include/succinct_rmq/README.md
diff --git a/include/succinct_rmq/bp_vector.hpp b/archive/include/succinct_rmq/bp_vector.hpp
similarity index 100%
rename from include/succinct_rmq/bp_vector.hpp
rename to archive/include/succinct_rmq/bp_vector.hpp
diff --git a/include/succinct_rmq/bp_vector_support.hpp b/archive/include/succinct_rmq/bp_vector_support.hpp
similarity index 100%
rename from include/succinct_rmq/bp_vector_support.hpp
rename to archive/include/succinct_rmq/bp_vector_support.hpp
diff --git a/include/succinct_rmq/cartesian_tree.hpp b/archive/include/succinct_rmq/cartesian_tree.hpp
similarity index 100%
rename from include/succinct_rmq/cartesian_tree.hpp
rename to archive/include/succinct_rmq/cartesian_tree.hpp
diff --git a/include/succinct_rmq/rs_bit_vector.hpp b/archive/include/succinct_rmq/rs_bit_vector.hpp
similarity index 100%
rename from include/succinct_rmq/rs_bit_vector.hpp
rename to archive/include/succinct_rmq/rs_bit_vector.hpp
diff --git a/archive/include/types.hpp b/archive/include/types.hpp
new file mode 100644
index 0000000..659199d
--- /dev/null
+++ b/archive/include/types.hpp
@@ -0,0 +1,47 @@
+#pragma once
+
+#include "completion_trie.hpp"
+#include "fc_dictionary.hpp"
+#include "integer_fc_dictionary.hpp"
+#include "compact_forward_index.hpp"
+#include "inverted_index.hpp"
+#include "blocked_inverted_index.hpp"
+#include "autocomplete.hpp"
+#include "autocomplete2.hpp"
+#include "autocomplete3.hpp"
+#include "autocomplete4.hpp"
+#include "compact_vector.hpp"
+#include "ef/ef_sequence.hpp"
+#include "ef/compact_ef.hpp"
+
+namespace autocomplete {
+
+typedef uint_vec<uint32_t> uint32_vec;
+typedef uint_vec<uint64_t> uint64_vec;
+
+typedef completion_trie<ef::ef_sequence, ef::ef_sequence, ef::ef_sequence,
+                        ef::ef_sequence>
+    ef_completion_trie;
+typedef fc_dictionary<> fc_dictionary_type;
+typedef integer_fc_dictionary<> integer_fc_dictionary_type;
+typedef inverted_index<ef::compact_ef> ef_inverted_index;
+typedef blocked_inverted_index<ef::compact_ef> ef_blocked_inverted_index;
+
+/* compressed indexes */
+typedef autocomplete<ef_completion_trie, fc_dictionary_type, ef_inverted_index,
+                     compact_forward_index>
+    ef_autocomplete_type1;
+
+typedef autocomplete2<integer_fc_dictionary_type, fc_dictionary_type,
+                      ef_inverted_index>
+    ef_autocomplete_type2;
+
+typedef autocomplete3<integer_fc_dictionary_type, fc_dictionary_type,
+                      ef_inverted_index>
+    ef_autocomplete_type3;
+
+typedef autocomplete4<integer_fc_dictionary_type, fc_dictionary_type,
+                      ef_blocked_inverted_index>
+    ef_autocomplete_type4;
+
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/uint_vec.hpp b/archive/include/uint_vec.hpp
similarity index 94%
rename from include/uint_vec.hpp
rename to archive/include/uint_vec.hpp
index 86d60c4..adeaa8c 100644
--- a/include/uint_vec.hpp
+++ b/archive/include/uint_vec.hpp
@@ -74,14 +74,14 @@ struct uint_vec {
     }
 
     uint64_t find(const range r, UintType id) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         assert(r.end <= size());
-        UintType prev_upper = previous_range_upperbound(r);
+        auto prev_upper = previous_range_upperbound(r);
         return util::find(*this, id + prev_upper, r.begin, r.end - 1);
     }
 
     range find(const range r, const range lex) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         assert(r.end <= size());
         auto prev_upper = previous_range_upperbound(r);
 
@@ -131,9 +131,9 @@ struct uint_vec {
     std::vector<UintType> m_data;
 
     UintType previous_range_upperbound(const range r) const {
-        assert(!r.is_invalid());
+        assert(r.is_valid());
         return r.begin ? access(r.begin - 1) : 0;
     }
-};  // namespace autocomplete
+};
 
 }  // namespace autocomplete
\ No newline at end of file
diff --git a/include/uncompressed_list.hpp b/archive/include/uncompressed_list.hpp
similarity index 100%
rename from include/uncompressed_list.hpp
rename to archive/include/uncompressed_list.hpp
diff --git a/include/unsorted_list.hpp b/archive/include/unsorted_list.hpp
similarity index 78%
rename from include/unsorted_list.hpp
rename to archive/include/unsorted_list.hpp
index e7cfddd..bb06a86 100644
--- a/include/unsorted_list.hpp
+++ b/archive/include/unsorted_list.hpp
@@ -1,48 +1,10 @@
 #pragma once
 
 #include "compact_vector.hpp"
+#include "util_types.hpp"
 
 namespace autocomplete {
 
-struct scored_byte_range {
-    byte_range string;
-    id_type score;
-};
-
-typedef std::function<bool(scored_range const&, scored_range const&)>
-    scored_range_comparator_type;
-scored_range_comparator_type scored_range_comparator =
-    [](scored_range const& l, scored_range const& r) {
-        return l.min_val > r.min_val;
-    };
-
-struct topk_queue {
-    void push(scored_range sr) {
-        m_q.push_back(sr);
-        std::push_heap(m_q.begin(), m_q.end(), scored_range_comparator);
-    }
-
-    scored_range top() {
-        return m_q.front();
-    }
-
-    void pop() {
-        std::pop_heap(m_q.begin(), m_q.end(), scored_range_comparator);
-        m_q.pop_back();
-    }
-
-    void clear() {
-        m_q.clear();
-    }
-
-    bool empty() const {
-        return m_q.empty();
-    }
-
-private:
-    std::vector<scored_range> m_q;
-};
-
 template <typename RMQ>
 struct unsorted_list {
     static const uint32_t SCAN_THRESHOLD = 64;
@@ -132,6 +94,40 @@ struct unsorted_list {
     }
 
 private:
+    struct topk_queue {
+        void push(scored_range sr) {
+            m_q.push_back(sr);
+            std::push_heap(m_q.begin(), m_q.end(), m_comparator);
+        }
+
+        scored_range top() {
+            return m_q.front();
+        }
+
+        void pop() {
+            std::pop_heap(m_q.begin(), m_q.end(), m_comparator);
+            m_q.pop_back();
+        }
+
+        void clear() {
+            m_q.clear();
+        }
+
+        bool empty() const {
+            return m_q.empty();
+        }
+
+    private:
+        std::vector<scored_range> m_q;
+
+        typedef std::function<bool(scored_range const&, scored_range const&)>
+            scrored_range_comparator_type;
+        scrored_range_comparator_type m_comparator = [](scored_range const& l,
+                                                        scored_range const& r) {
+            return scored_range::greater(l, r);
+        };
+    };
+
     topk_queue m_q;
     RMQ m_rmq;
     compact_vector m_list;
diff --git a/include/util.hpp b/archive/include/util.hpp
similarity index 99%
rename from include/util.hpp
rename to archive/include/util.hpp
index bb20bdb..4f0b89e 100644
--- a/include/util.hpp
+++ b/archive/include/util.hpp
@@ -51,6 +51,7 @@ uint64_t find(S const& sequence, uint64_t id, uint64_t lo, uint64_t hi) {
         if (val == id) {
             return pos;
         } else if (val > id) {
+            if (pos == 0) return global::not_found;
             hi = pos - 1;
         } else {
             lo = pos + 1;
diff --git a/include/util_types.hpp b/archive/include/util_types.hpp
similarity index 81%
rename from include/util_types.hpp
rename to archive/include/util_types.hpp
index 7405378..0890002 100644
--- a/include/util_types.hpp
+++ b/archive/include/util_types.hpp
@@ -36,6 +36,7 @@ struct range {
     uint64_t begin;
     uint64_t end;
     bool is_invalid() const;
+    bool is_valid() const;
     bool contains(uint64_t val) const;
 };
 
@@ -48,6 +49,10 @@ bool range::is_invalid() const {
            end == global::invalid_range.end or begin > end;
 }
 
+bool range::is_valid() const {
+    return !is_invalid();
+}
+
 bool range::contains(uint64_t val) const {
     if (val >= begin and val <= end) return true;
     return false;
@@ -57,6 +62,55 @@ struct scored_range {
     range r;
     uint32_t min_pos;
     id_type min_val;
+
+    static bool greater(scored_range const& l, scored_range const& r) {
+        return l.min_val > r.min_val;
+    }
+};
+
+template <typename Iterator>
+struct scored_range_with_list_iterator {
+    typedef Iterator iterator_type;
+
+    scored_range_with_list_iterator()
+        : min_pos(global::invalid_term_id)
+        , m_open(false) {}
+
+    range r;
+    uint32_t min_pos;
+    id_type min_val;
+    Iterator iterator;
+
+    bool is_open() const {
+        return m_open;
+    }
+
+    template <typename InvertedIndex>
+    void set_iterator(InvertedIndex const& index) {
+        assert(min_pos != global::invalid_term_id);
+        m_open = true;
+        iterator = index.iterator(min_pos);
+    }
+
+    id_type minimum() const {
+        return is_open() ? *iterator : min_val;
+    }
+
+    // static bool greater(scored_range_with_list_iterator const& l,
+    //                     scored_range_with_list_iterator const& r) {
+    //     return l.minimum() > r.minimum();
+    // }
+
+private:
+    bool m_open;
+};
+
+template <typename Iterator>
+struct scored_range_with_list_iterator_comparator {
+    bool operator()(scored_range_with_list_iterator<Iterator> const& l,
+                    scored_range_with_list_iterator<Iterator> const& r) {
+        return l.minimum() > r.minimum();
+    }
 };
 
 struct byte_range {
@@ -237,25 +291,4 @@ struct timer {
 
 typedef timer<clock_type, duration_type> timer_type;
 
-struct iterator {
-    iterator(id_type begin, id_type end)
-        : m_begin(begin)
-        , m_end(end) {}
-
-    bool has_next() const {
-        return m_begin < m_end;
-    }
-
-    id_type operator*() const {
-        return m_begin;
-    }
-
-    void operator++() {
-        ++m_begin;
-    }
-
-private:
-    id_type m_begin, m_end;
-};
-
 }  // namespace autocomplete
diff --git a/archive/install.sh b/archive/install.sh
new file mode 100644
index 0000000..7714147
--- /dev/null
+++ b/archive/install.sh
@@ -0,0 +1,11 @@
+git submodule init
+git submodule update
+mkdir -p build
+cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release -DUSE_SANITIZERS=Off -DUSE_INTRINSICS=On -DUSE_PDEP=On
+make
+cd ../test_data
+bash preprocess.sh trec_05_efficiency_queries/trec_05_efficiency_queries.completions 300
+cd ../build
+make test
+cd ..
diff --git a/archive/script/benchmark_dictionaries.sh b/archive/script/benchmark_dictionaries.sh
new file mode 100644
index 0000000..29c9a84
--- /dev/null
+++ b/archive/script/benchmark_dictionaries.sh
@@ -0,0 +1,7 @@
+cd ../test_data
+bash preprocess.sh aol/aol.completions 100000
+cd ../build
+python ../script/collect_locate_prefix_results_by_varying_percentage.py fc ../test_data/aol/aol.completions 100000
+python ../script/collect_locate_prefix_results_by_varying_percentage.py trie ../test_data/aol/aol.completions 100000
+./benchmark_fc_dictionary ../test_data/aol/aol.completions 100000 < ../test_data/aol/aol.completions.queries/queries.length=1 > ../test_data/aol/aol.completions.dictionary_benchmark.txt
+cd ../script
\ No newline at end of file
diff --git a/archive/script/build_indexes.py b/archive/script/build_indexes.py
new file mode 100644
index 0000000..e01e1db
--- /dev/null
+++ b/archive/script/build_indexes.py
@@ -0,0 +1,6 @@
+import sys, os
+
+dataset_name = sys.argv[1] # e.g., aol
+types = ["ef_type1", "ef_type2", "ef_type3", "ef_type4"]
+for t in types:
+    os.system("./build " + t + " ../test_data/" + dataset_name + "/" + dataset_name + ".completions -o " + t + "." + dataset_name + ".bin -c 0.0001")
\ No newline at end of file
diff --git a/archive/script/collect_effectiveness_results_by_varying_percentage.py b/archive/script/collect_effectiveness_results_by_varying_percentage.py
new file mode 100644
index 0000000..2693e70
--- /dev/null
+++ b/archive/script/collect_effectiveness_results_by_varying_percentage.py
@@ -0,0 +1,17 @@
+import sys, os
+
+index_type = sys.argv[1]
+index_filename = sys.argv[2]
+collection_basename = sys.argv[3] # e.g., aol/aol.completions or aol/aol.completions.filtered
+k = sys.argv[4]
+num_queries = sys.argv[5]
+
+output_filename = collection_basename + "." + index_type
+output_filename += ".effectiveness.json"
+query_filename_prefix = collection_basename + ".queries/queries."
+
+percentages = ["0.0", "0.25", "0.50", "0.75"]
+for perc in percentages:
+    for terms in range(1,7):
+        os.system("../build/effectiveness " + index_type + " " + k + " ../build/" + index_filename + " " + str(terms) + " " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=" + str(terms) + " 2>> " + output_filename)
+    os.system("../build/effectiveness " + index_type + " " + k + " ../build/" + index_filename + " 7+ " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=7+ 2>> " + output_filename)
diff --git a/archive/script/collect_locate_prefix_results_by_varying_percentage.py b/archive/script/collect_locate_prefix_results_by_varying_percentage.py
new file mode 100644
index 0000000..305fafa
--- /dev/null
+++ b/archive/script/collect_locate_prefix_results_by_varying_percentage.py
@@ -0,0 +1,14 @@
+import sys, os
+
+type = sys.argv[1] # 'trie' or 'fc'
+collection_basename = sys.argv[2]
+num_queries = sys.argv[3]
+
+output_filename = collection_basename + "." + type + ".locate_prefix.json"
+query_filename_prefix = collection_basename + ".queries/queries."
+
+percentages = ["0.0", "0.25", "0.50", "0.75"]
+for perc in percentages:
+    for terms in range(1,8):
+        os.system("../build/benchmark_locate_prefix " + type + " " + collection_basename + " " + str(terms) + " " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=" + str(terms) + " 2>> " + output_filename)
+    os.system("../build/benchmark_locate_prefix " + type + " " + collection_basename + " 8+ " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=8+ 2>> " + output_filename)
diff --git a/archive/script/collect_results_by_varying_percentage.py b/archive/script/collect_results_by_varying_percentage.py
new file mode 100644
index 0000000..c639032
--- /dev/null
+++ b/archive/script/collect_results_by_varying_percentage.py
@@ -0,0 +1,18 @@
+import sys, os
+
+index_type = sys.argv[1]
+query_mode = sys.argv[2] # topk, prefix_topk, conjunctive_topk
+index_filename = sys.argv[3]
+collection_basename = sys.argv[4] # e.g., aol/aol.completions or aol/aol.completions.filtered
+k = sys.argv[5]
+num_queries = sys.argv[6]
+
+output_filename = collection_basename + "." + index_type
+output_filename += "." + query_mode + ".json"
+query_filename_prefix = collection_basename + ".queries/queries."
+
+percentages = ["0.0", "0.25", "0.50", "0.75"]
+for perc in percentages:
+    for terms in range(1,7):
+        os.system("../build/benchmark_" + query_mode + " " + index_type + " " + k + " ../build/" + index_filename + " " + str(terms) + " " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=" + str(terms) + " 2>> " + output_filename)
+    os.system("../build/benchmark_" + query_mode + " " + index_type + " " + k + " ../build/" + index_filename + " 7+ " + str(num_queries) + " " + perc + " < " + query_filename_prefix + "length=7+ 2>> " + output_filename)
diff --git a/src/CMakeLists.txt b/archive/src/CMakeLists.txt
similarity index 70%
rename from src/CMakeLists.txt
rename to archive/src/CMakeLists.txt
index 7b000b1..1c5a82d 100644
--- a/src/CMakeLists.txt
+++ b/archive/src/CMakeLists.txt
@@ -2,3 +2,5 @@ add_executable(build build.cpp)
 add_executable(web_server web_server.cpp ../external/mongoose/mongoose.c)
 add_executable(output_ds2i_format output_ds2i_format.cpp)
 add_executable(statistics statistics.cpp)
+# add_executable(check_topk check_topk.cpp)
+add_executable(map_queries map_queries.cpp)
\ No newline at end of file
diff --git a/src/build.cpp b/archive/src/build.cpp
similarity index 52%
rename from src/build.cpp
rename to archive/src/build.cpp
index 732318f..ba73954 100644
--- a/src/build.cpp
+++ b/archive/src/build.cpp
@@ -2,57 +2,48 @@
 
 #include "types.hpp"
 #include "statistics.hpp"
+#include "../external/cmd_line_parser/include/parser.hpp"
 
 using namespace autocomplete;
 
 template <typename Index>
-void build(parameters const& params, char const* output_filename) {
+void build(parameters const& params, std::string const& output_filename) {
     Index index(params);
     index.print_stats();
-    if (output_filename) {
+    if (output_filename != "") {
         essentials::logger("saving data structure to disk...");
-        essentials::save<Index>(index, output_filename);
+        essentials::save<Index>(index, output_filename.c_str());
         essentials::logger("DONE");
     }
 }
 
 void build_type4(parameters const& params, const float c,
-                 char const* output_filename) {
+                 std::string const& output_filename) {
     ef_autocomplete_type4 index(params, c);
     index.print_stats();
-    if (output_filename) {
+    if (output_filename != "") {
         essentials::logger("saving data structure to disk...");
-        essentials::save<ef_autocomplete_type4>(index, output_filename);
+        essentials::save<ef_autocomplete_type4>(index, output_filename.c_str());
         essentials::logger("DONE");
     }
 }
 
 int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory + 1) {
-        std::cout << argv[0]
-                  << " <type> <collection_basename> [-o output_filename] [-c c]"
-                  << std::endl;
-        return 1;
-    }
-
-    std::string type(argv[1]);
+    cmd_line_parser::parser parser(argc, argv);
+    parser.add("type", "Index type.");
+    parser.add("collection_basename", "Collection basename.");
+    parser.add("output_filename", "Output filename.", "-o", false);
+    parser.add(
+        "c",
+        "Value for Bast and Weber's technique: c must be a float in (0,1].",
+        "-c", false);
+    if (!parser.parse()) return 1;
+
+    auto type = parser.get<std::string>("type");
     parameters params;
-    params.collection_basename = argv[2];
+    params.collection_basename = parser.get<std::string>("collection_basename");
     params.load();
-
-    char const* output_filename = nullptr;
-    float c = 0.0;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        } else if (std::string(argv[i]) == "-c") {
-            ++i;
-            c = std::stof(argv[i]);
-        }
-    }
+    auto output_filename = parser.get<std::string>("output_filename");
 
     if (type == "ef_type1") {
         build<ef_autocomplete_type1>(params, output_filename);
@@ -61,10 +52,7 @@ int main(int argc, char** argv) {
     } else if (type == "ef_type3") {
         build<ef_autocomplete_type3>(params, output_filename);
     } else if (type == "ef_type4") {
-        if (c == 0.0) {
-            std::cerr << "c must be greater than 0.0" << std::endl;
-            return 1;
-        }
+        auto c = parser.get<float>("c");
         build_type4(params, c, output_filename);
     } else {
         return 1;
diff --git a/archive/src/check_topk.cpp b/archive/src/check_topk.cpp
new file mode 100644
index 0000000..cb466a1
--- /dev/null
+++ b/archive/src/check_topk.cpp
@@ -0,0 +1,64 @@
+#include <iostream>
+
+#include "types.hpp"
+#include "../benchmark/benchmark_common.hpp"
+
+using namespace autocomplete;
+
+template <typename Index>
+void check_topk(char const* binary_filename1, char const* binary_filename2,
+                uint32_t k, uint32_t max_num_queries, float keep) {
+    Index index1;
+    ef_autocomplete_type1 index2;
+    essentials::load(index1, binary_filename1);
+    essentials::load(index2, binary_filename2);
+    std::vector<std::string> queries;
+    load_queries(queries, max_num_queries, keep, std::cin);
+    for (auto const& query : queries) {
+        size_t n1 = index1.topk(query, k).size();
+        size_t n2 = index2.topk(query, k).size();
+        if (n1 != n2) {
+            std::cout << query << std::endl;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    int mandatory = 6;
+    if (argc < mandatory + 1) {
+        std::cout << argv[0]
+                  << " <type> <k> <binary_filename1> "
+                     "<binary_filename2> "
+                     "<max_num_queries> <percentage> < queries"
+                  << std::endl;
+        std::cout << "<percentage> is a float in [0,1] and specifies how much "
+                     "we keep of the last token in a query "
+                  << std::endl;
+        return 1;
+    }
+
+    std::string type(argv[1]);
+    uint32_t k = std::atoi(argv[2]);
+    char const* binary_filename1 = argv[3];
+    char const* binary_filename2 = argv[4];
+    uint32_t max_num_queries = std::atoi(argv[5]);
+    float keep = std::atof(argv[6]);
+
+    if (type == "ef_type1") {
+        check_topk<ef_autocomplete_type1>(binary_filename1, binary_filename2, k,
+                                          max_num_queries, keep);
+    } else if (type == "ef_type2") {
+        check_topk<ef_autocomplete_type2>(binary_filename1, binary_filename2, k,
+                                          max_num_queries, keep);
+    } else if (type == "ef_type3") {
+        check_topk<ef_autocomplete_type3>(binary_filename1, binary_filename2, k,
+                                          max_num_queries, keep);
+    } else if (type == "ef_type4") {
+        check_topk<ef_autocomplete_type4>(binary_filename1, binary_filename2, k,
+                                          max_num_queries, keep);
+    } else {
+        return 1;
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/archive/src/map_queries.cpp b/archive/src/map_queries.cpp
new file mode 100644
index 0000000..de43df1
--- /dev/null
+++ b/archive/src/map_queries.cpp
@@ -0,0 +1,53 @@
+#include <iostream>
+
+#include "types.hpp"
+
+using namespace autocomplete;
+
+template <typename Dictionary>
+completion_type parse(Dictionary const& dict, std::string const& query) {
+    completion_type completion;
+    byte_range_iterator it(string_to_byte_range(query));
+    while (it.has_next()) {
+        byte_range term = it.next();
+        auto term_id = dict.locate(term);
+        assert(term_id > 0);
+        assert(term_id != global::invalid_term_id);
+        completion.push_back(term_id - 1);
+    }
+    return completion;
+}
+
+int main(int argc, char** argv) {
+    int mandatory = 2 + 1;
+    if (argc < mandatory) {
+        std::cout << argv[0] << " <collection_basename> <num_queries> < queries"
+                  << std::endl;
+        return 1;
+    }
+
+    parameters params;
+    params.collection_basename = argv[1];
+    params.load();
+
+    uint32_t num_queries = std::atoi(argv[2]);
+
+    fc_dictionary_type dict;
+    {
+        fc_dictionary_type::builder builder(params);
+        builder.build(dict);
+    }
+
+    std::string query;
+    for (uint32_t i = 0; i != num_queries; ++i) {
+        if (!std::getline(std::cin, query)) break;
+        auto completion = parse(dict, query);
+        std::cerr << completion.front();
+        for (size_t i = 1; i != completion.size(); ++i) {
+            std::cerr << "\t" << completion[i];
+        }
+        std::cerr << "\n";
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/src/output_ds2i_format.cpp b/archive/src/output_ds2i_format.cpp
similarity index 97%
rename from src/output_ds2i_format.cpp
rename to archive/src/output_ds2i_format.cpp
index cc139c4..eb92509 100644
--- a/src/output_ds2i_format.cpp
+++ b/archive/src/output_ds2i_format.cpp
@@ -27,7 +27,7 @@ int main(int argc, char** argv) {
 
     {  // write ds2i header
         uint32_t n = 1;
-        uint32_t universe = params.num_completions;
+        uint32_t universe = params.universe;
         docs.write(reinterpret_cast<const char*>(&n), sizeof(uint32_t));
         docs.write(reinterpret_cast<const char*>(&universe), sizeof(uint32_t));
     }
diff --git a/src/statistics.cpp b/archive/src/statistics.cpp
similarity index 58%
rename from src/statistics.cpp
rename to archive/src/statistics.cpp
index 5b2148f..9dbf689 100644
--- a/src/statistics.cpp
+++ b/archive/src/statistics.cpp
@@ -2,25 +2,25 @@
 
 #include "types.hpp"
 #include "statistics.hpp"
+#include "../external/cmd_line_parser/include/parser.hpp"
 
 using namespace autocomplete;
 
 template <typename Index>
-void print_stats(char const* index_filename) {
+void print_stats(std::string const& index_filename) {
     Index index;
-    essentials::load(index, index_filename);
+    essentials::load(index, index_filename.c_str());
     index.print_stats();
 }
 
 int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory + 1) {
-        std::cout << argv[0] << " <type> <index_filename>" << std::endl;
-        return 1;
-    }
+    cmd_line_parser::parser parser(argc, argv);
+    parser.add("type", "Index type.");
+    parser.add("index_filename", "Index filename.");
+    if (!parser.parse()) return 1;
 
-    std::string type(argv[1]);
-    char const* index_filename = argv[2];
+    auto type = parser.get<std::string>("type");
+    auto index_filename = parser.get<std::string>("index_filename");
 
     if (type == "ef_type1") {
         print_stats<ef_autocomplete_type1>(index_filename);
diff --git a/src/web_server.cpp b/archive/src/web_server.cpp
similarity index 92%
rename from src/web_server.cpp
rename to archive/src/web_server.cpp
index 94a259b..db317fa 100644
--- a/src/web_server.cpp
+++ b/archive/src/web_server.cpp
@@ -5,6 +5,7 @@
 
 #include "constants.hpp"
 #include "types.hpp"
+#include "probe.hpp"
 
 #include "../external/mongoose/mongoose.h"
 
@@ -26,7 +27,7 @@ std::string escape_json(std::string const& s) {
 
 using namespace autocomplete;
 
-typedef ef_autocomplete_type3 topk_index_type;
+typedef ef_autocomplete_type1 topk_index_type;
 
 static std::string s_http_port("8000");
 static struct mg_serve_http_opts s_http_server_opts;
@@ -53,9 +54,10 @@ static void ev_handler(struct mg_connection* nc, int ev, void* p) {
             }
 
             std::string data;
-            auto it = topk_index.topk(query, k);
-            // auto it = topk_index.prefix_topk(query, k);
-            // auto it = topk_index.conjunctive_topk(query, k);
+            nop_probe probe;
+            // auto it = topk_index.topk(query, k probe);
+            // auto it = topk_index.prefix_topk(query, k, probe);
+            auto it = topk_index.conjunctive_topk(query, k, probe);
             if (it.empty()) {
                 data = "{\"suggestions\":[\"value\":\"\",\"data\":\"\"]}\n";
             } else {
diff --git a/archive/test/test_autocomplete.cpp b/archive/test/test_autocomplete.cpp
new file mode 100644
index 0000000..8fe49cc
--- /dev/null
+++ b/archive/test/test_autocomplete.cpp
@@ -0,0 +1,83 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+typedef ef_autocomplete_type1 index_type;
+
+TEST_CASE("test autocomplete topk functions") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        index_type index(params);
+        essentials::save<index_type>(index, output_filename);
+    }
+
+    {
+        index_type index;
+        essentials::load(index, output_filename);
+
+        {
+            essentials::logger("testing prefix_topk()...");
+            uint32_t k = 7;
+            std::vector<std::string> queries = {
+                "a",        "10",          "african",
+                "air",      "commercial",  "internet",
+                "paris",    "somerset",    "the",
+                "the new",  "the perfect", "the starting line",
+                "yu gi oh", "for sale",    "dave mat",
+                "florence", "florida be",  "for s",
+                "for sa",   "for sal",     "for sale",
+                "ford a",   "ford au",     "ford m",
+                "ford mu",  "for",         "fo",
+                "f",        "matt",        "fl",
+                "florir",   "fly",         "the starting l",
+                "floridaaa"};
+
+            nop_probe probe;
+            for (auto& query : queries) {
+                auto it = index.prefix_topk(query, k, probe);
+                std::cout << "top-" << it.size() << " completions for '"
+                          << query << "':\n";
+                for (uint32_t i = 0; i != it.size(); ++i, ++it) {
+                    auto completion = *it;
+                    std::cout << "(" << completion.score << ", '";
+                    print(completion.string);
+                    std::cout << "')" << std::endl;
+                }
+            }
+
+            essentials::logger("DONE");
+        }
+
+        {
+            essentials::logger("testing conjunctive_topk()...");
+            uint32_t k = 7;
+            std::vector<std::string> queries = {
+                "dave mat", "florence", "florida be",    "for s",
+                "for sa",   "for sal",  "for sale",      "ford a",
+                "ford au",  "ford m",   "ford mu",       "for",
+                "fo",       "f",        "matt",          "fl",
+                "flor",     "fly",      "the starting l"};
+
+            nop_probe probe;
+            for (auto& query : queries) {
+                auto it = index.conjunctive_topk(query, k, probe);
+                std::cout << "top-" << it.size() << " completions for '"
+                          << query << "':\n";
+                for (uint32_t i = 0; i != it.size(); ++i, ++it) {
+                    auto completion = *it;
+                    std::cout << "(" << completion.score << ", '";
+                    print(completion.string);
+                    std::cout << "')" << std::endl;
+                }
+            }
+
+            essentials::logger("DONE");
+        }
+    }
+
+    std::remove(output_filename);
+}
diff --git a/archive/test/test_blocked_inverted_index.cpp b/archive/test/test_blocked_inverted_index.cpp
new file mode 100644
index 0000000..a2ede74
--- /dev/null
+++ b/archive/test/test_blocked_inverted_index.cpp
@@ -0,0 +1,63 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+typedef ef_blocked_inverted_index blocked_inverted_index_type;
+typedef ef_inverted_index inverted_index_type;
+
+TEST_CASE("test blocked_inverted_index::intersection_iterator") {
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    inverted_index_type ii;
+
+    {
+        inverted_index_type::builder ii_builder(params);
+        ii_builder.build(ii);
+        REQUIRE(ii.num_docs() == params.universe);
+        REQUIRE(ii.num_terms() == params.num_terms);
+    }
+
+    {
+        static const uint32_t num_queries = 10000;
+        static const uint32_t max_num_terms = 3;
+        auto queries = testing::gen_random_queries(num_queries, max_num_terms,
+                                                   params.num_terms);
+
+        static const std::vector<float> C = {0.0125, 0.025, 0.05, 0.1};
+        blocked_inverted_index_type blocked_ii;
+        uint64_t total;
+
+        for (auto c : C) {
+            total = 0;
+            {
+                blocked_inverted_index_type::builder blocked_ii_builder(params,
+                                                                        c);
+                blocked_ii_builder.build(blocked_ii);
+            }
+
+            REQUIRE(blocked_ii.num_docs() == params.universe);
+            REQUIRE(blocked_ii.num_terms() == params.num_terms);
+
+            for (auto& q : queries) {
+                auto ii_it = ii.intersection_iterator(q);
+                auto blocked_ii_it =
+                    blocked_ii.intersection_iterator(q, {0, 0});
+
+                uint32_t n = 0;
+                for (; ii_it.has_next(); ++n, ++ii_it, ++blocked_ii_it) {
+                    auto got = *blocked_ii_it;
+                    auto expected = *ii_it;
+                    REQUIRE_MESSAGE(got == expected, "expected doc_id "
+                                                         << expected
+                                                         << " but got " << got);
+                }
+                if (n) total += n;
+                REQUIRE(blocked_ii_it.has_next() == false);
+            }
+
+            std::cout << total << std::endl;
+        }
+    }
+}
diff --git a/archive/test/test_common.hpp b/archive/test/test_common.hpp
new file mode 100644
index 0000000..c17283f
--- /dev/null
+++ b/archive/test/test_common.hpp
@@ -0,0 +1,88 @@
+#pragma once
+
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include "../external/doctest/doctest/doctest.h"
+
+#include <iostream>
+
+#include "types.hpp"
+#include "probe.hpp"
+#include "../benchmark/benchmark_common.hpp"
+
+namespace autocomplete {
+namespace testing {
+
+static std::string test_filename(
+    "../test_data/trec_05_efficiency_queries/"
+    "trec_05_efficiency_queries.completions");
+
+static std::string tmp_filename("tmp.bin");
+
+id_type locate(std::vector<std::string> const& terms, std::string const& t) {
+    return std::distance(terms.begin(),
+                         std::lower_bound(terms.begin(), terms.end(), t)) +
+           1;
+}
+
+range locate_prefix(std::vector<std::string> const& strings,
+                    std::string const& p) {
+    auto comp_l = [](std::string const& l, std::string const& r) {
+        if (l.size() < r.size()) {
+            return strncmp(l.c_str(), r.c_str(), l.size()) <= 0;
+        }
+        return strcmp(l.c_str(), r.c_str()) < 0;
+    };
+
+    auto comp_r = [](std::string const& l, std::string const& r) {
+        if (l.size() < r.size()) {
+            return strncmp(l.c_str(), r.c_str(), l.size()) < 0;
+        }
+        return strcmp(l.c_str(), r.c_str()) < 0;
+    };
+
+    range r;
+    r.begin = std::distance(
+        strings.begin(),
+        std::lower_bound(strings.begin(), strings.end(), p, comp_l));
+    r.end = std::distance(
+        strings.begin(),
+        std::upper_bound(strings.begin(), strings.end(), p, comp_r));
+
+    return r;
+}
+
+typedef std::vector<id_type> term_ids;
+
+std::vector<term_ids> gen_random_queries(uint32_t num_queries,
+                                         uint32_t max_num_terms,
+                                         uint32_t max_range_len) {
+    assert(max_num_terms > 1);
+    std::vector<term_ids> queries;
+    queries.reserve(num_queries);
+    essentials::uniform_int_rng<uint32_t> random_num_terms(2, max_num_terms);
+    essentials::uniform_int_rng<uint32_t> random_term_id(1, max_range_len);
+
+    for (uint32_t i = 0; i != num_queries; ++i) {
+        term_ids q;
+        uint32_t num_terms = random_num_terms.gen();
+        q.reserve(num_terms);
+        uint32_t num_distinct_terms = 0;
+        while (true) {
+            q.clear();
+            for (uint32_t i = 0; i != num_terms; ++i) {
+                q.push_back(random_term_id.gen());
+            }
+            std::sort(q.begin(), q.end());
+            auto end = std::unique(q.begin(), q.end());
+            num_distinct_terms = std::distance(q.begin(), end);
+            if (num_distinct_terms >= 2) break;
+        }
+        q.resize(num_distinct_terms);
+        queries.push_back(q);
+    }
+
+    return queries;
+}
+
+}  // namespace testing
+}  // namespace autocomplete
\ No newline at end of file
diff --git a/archive/test/test_compact_forward_index.cpp b/archive/test/test_compact_forward_index.cpp
new file mode 100644
index 0000000..dc78c07
--- /dev/null
+++ b/archive/test/test_compact_forward_index.cpp
@@ -0,0 +1,47 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+TEST_CASE("test compact_forward_index::iterator") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        compact_forward_index::builder builder(params);
+        compact_forward_index index;
+        builder.build(index);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+        essentials::save<compact_forward_index>(index, output_filename);
+    }
+
+    {
+        compact_forward_index index;
+        essentials::load(index, output_filename);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+
+        std::ifstream input((params.collection_basename + ".forward").c_str(),
+                            std::ios_base::in);
+        for (uint64_t i = 0; i != index.num_terms(); ++i) {
+            auto it = index.iterator(i);
+            uint32_t n = 0;
+            input >> n;
+            REQUIRE_MESSAGE(n == it.size(), "list has size " << it.size()
+                                                             << " instead of "
+                                                             << n);
+            for (uint64_t k = 0; k != n; ++k, ++it) {
+                id_type expected;
+                input >> expected;
+                auto got = *it;
+                REQUIRE_MESSAGE(got == expected,
+                                "got " << got << " but expected " << expected);
+            }
+        }
+        input.close();
+
+        std::remove(output_filename);
+    }
+};
diff --git a/archive/test/test_completion_trie.cpp b/archive/test/test_completion_trie.cpp
new file mode 100644
index 0000000..c5155e1
--- /dev/null
+++ b/archive/test/test_completion_trie.cpp
@@ -0,0 +1,37 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+typedef ef_completion_trie completion_trie_type;
+
+TEST_CASE("test completion_trie::is_member()") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        completion_trie_type::builder builder(params);
+        completion_trie_type ct;
+        builder.build(ct);
+        REQUIRE(ct.size() == params.num_completions);
+        essentials::save<completion_trie_type>(ct, output_filename);
+    }
+
+    {
+        completion_trie_type ct;
+        essentials::load(ct, output_filename);
+        REQUIRE(ct.size() == params.num_completions);
+        std::ifstream input(params.collection_basename + ".mapped",
+                            std::ios_base::in);
+        INFO("testing is_member()");
+        completion_iterator it(params, input);
+        while (input) {
+            auto& record = *it;
+            REQUIRE(ct.is_member(record.completion));
+            ++it;
+        }
+        input.close();
+        std::remove(output_filename);
+    }
+}
diff --git a/archive/test/test_fc_dictionary.cpp b/archive/test/test_fc_dictionary.cpp
new file mode 100644
index 0000000..50d12b0
--- /dev/null
+++ b/archive/test/test_fc_dictionary.cpp
@@ -0,0 +1,86 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+TEST_CASE("test fc_dictionary") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        fc_dictionary_type::builder builder(params);
+        fc_dictionary_type dict;
+        builder.build(dict);
+        essentials::save<fc_dictionary_type>(dict, output_filename);
+    }
+
+    {
+        fc_dictionary_type dict;
+        essentials::load(dict, output_filename);
+
+        // test locate() and extract for all strings
+        std::vector<std::string> terms;
+        terms.reserve(params.num_terms);
+        std::ifstream input((params.collection_basename + ".dict").c_str(),
+                            std::ios_base::in);
+        if (!input.good()) {
+            throw std::runtime_error("File not found");
+        }
+        std::string term;
+        term.reserve(256 + 1);
+        input >> term;
+        while (input) {
+            terms.push_back(std::move(term));
+            input >> term;
+        }
+        input.close();
+
+        std::vector<uint8_t> decoded(2 * constants::MAX_NUM_CHARS_PER_QUERY);
+
+        for (auto const& t : terms) {
+            id_type expected = testing::locate(terms, t);
+            id_type got = dict.locate(string_to_byte_range(t));
+
+            REQUIRE_MESSAGE(got == expected, "expected id " << expected
+                                                            << ", but got id "
+                                                            << got);
+
+            uint8_t string_len = dict.extract(got, decoded.data());
+            REQUIRE_MESSAGE(string_len == t.size(),
+                            "expected size " << t.size() << ", but got size "
+                                             << string_len);
+
+            auto s = reinterpret_cast<char const*>(decoded.data());
+            for (uint8_t i = 0; i != string_len; ++i) {
+                REQUIRE_MESSAGE(t[i] == s[i], "expected char " << t[i]
+                                                               << " but got "
+                                                               << s[i]);
+            }
+        }
+
+        // test locate_prefix() for all strings
+        std::string prefix;
+        prefix.reserve(256 + 1);
+        for (auto const& t : terms) {
+            uint32_t n = t.size();
+            for (uint32_t prefix_len = 1; prefix_len <= n; ++prefix_len) {
+                prefix.clear();
+                for (uint32_t i = 0; i != prefix_len; ++i) {
+                    prefix.push_back(t[i]);
+                }
+
+                range expected = testing::locate_prefix(terms, prefix);
+                range got = dict.locate_prefix(string_to_byte_range(prefix));
+                REQUIRE_MESSAGE((got.begin == expected.begin and
+                                 got.end == expected.end - 1),
+                                "Error for prefix '"
+                                    << prefix << "' : expected ["
+                                    << expected.begin << "," << expected.end - 1
+                                    << "] but got [" << got.begin << ","
+                                    << got.end << "]");
+            }
+        }
+        std::remove(output_filename);
+    }
+}
diff --git a/archive/test/test_integer_fc_dictionary.cpp b/archive/test/test_integer_fc_dictionary.cpp
new file mode 100644
index 0000000..d36db82
--- /dev/null
+++ b/archive/test/test_integer_fc_dictionary.cpp
@@ -0,0 +1,63 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+TEST_CASE("test integer_fc_dictionary") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        integer_fc_dictionary_type::builder builder(params);
+        integer_fc_dictionary_type dict;
+        builder.build(dict);
+        essentials::save<integer_fc_dictionary_type>(dict, output_filename);
+    }
+
+    {
+        integer_fc_dictionary_type dict;
+        essentials::load(dict, output_filename);
+
+        {
+            std::ifstream input(
+                (params.collection_basename + ".mapped").c_str(),
+                std::ios_base::in);
+            completion_iterator it(params, input);
+
+            completion_type decoded(2 * constants::MAX_NUM_TERMS_PER_QUERY);
+            for (id_type id = 0; id != params.num_completions; ++id, ++it) {
+                auto const& expected = (*it).completion;
+                REQUIRE(expected.size() > 0);
+                uint8_t size = dict.extract(id, decoded);
+
+                REQUIRE_MESSAGE(expected.size() - 1 == size,
+                                "Error in decoding the "
+                                    << id << "-th string: expected size "
+                                    << expected.size() - 1 << ","
+                                    << " but got size " << int(size));
+
+                for (uint8_t i = 0; i != size; ++i) {
+                    REQUIRE_MESSAGE(decoded[i] == expected[i],
+                                    "Error in decoding the "
+                                        << id << "-th string: expected "
+                                        << expected[i] << ","
+                                        << " but got " << decoded[i]
+                                        << " at position " << int(i));
+                }
+
+                id_type got_id =
+                    dict.locate({decoded.data(), decoded.data() + size});
+                REQUIRE(got_id != global::invalid_term_id);
+                REQUIRE_MESSAGE(got_id == id, "Error in locating the "
+                                                  << id
+                                                  << "-th string: expected id "
+                                                  << id << ","
+                                                  << " but got id " << got_id);
+            }
+
+            input.close();
+        }
+        std::remove(output_filename);
+    }
+}
diff --git a/archive/test/test_inverted_index.cpp b/archive/test/test_inverted_index.cpp
new file mode 100644
index 0000000..5faa823
--- /dev/null
+++ b/archive/test/test_inverted_index.cpp
@@ -0,0 +1,135 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+typedef ef_inverted_index inverted_index_type;
+
+TEST_CASE("test inverted_index::iterator") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        inverted_index_type::builder builder(params);
+        inverted_index_type index;
+        builder.build(index);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+        essentials::save<inverted_index_type>(index, output_filename);
+    }
+
+    {
+        inverted_index_type index;
+        essentials::load(index, output_filename);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+
+        std::ifstream input((params.collection_basename + ".inverted").c_str(),
+                            std::ios_base::in);
+        for (uint64_t i = 0; i != index.num_terms(); ++i) {
+            auto it = index.iterator(i);
+            uint32_t n = 0;
+            input >> n;
+            REQUIRE_MESSAGE(n == it.size(), "list has size " << it.size()
+                                                             << " instead of "
+                                                             << n);
+            for (uint64_t k = 0; k != n; ++k, ++it) {
+                id_type expected;
+                input >> expected;
+                auto got = *it;
+                REQUIRE_MESSAGE(got == expected,
+                                "got " << got << " but expected " << expected);
+            }
+        }
+        input.close();
+
+        std::remove(output_filename);
+    }
+};
+
+TEST_CASE("test inverted_index::intersection_iterator") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    {
+        inverted_index_type::builder builder(params);
+        inverted_index_type index;
+        builder.build(index);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+        essentials::save<inverted_index_type>(index, output_filename);
+    }
+
+    {
+        inverted_index_type index;
+        essentials::load(index, output_filename);
+        REQUIRE(index.num_docs() == params.universe);
+        REQUIRE(index.num_terms() == params.num_terms);
+
+        static const uint32_t num_queries = 1000000;
+        static const uint32_t max_num_terms = 5;
+        auto queries = testing::gen_random_queries(num_queries, max_num_terms,
+                                                   index.num_terms());
+
+        std::vector<id_type> first(index.num_docs());
+        std::vector<id_type> second(index.num_docs());
+        std::vector<id_type> intersection(index.num_docs());
+
+        for (auto const& q : queries) {
+            uint32_t first_size = 0;
+            uint32_t second_size = 0;
+            assert(q.size() >= 2);
+
+            {
+                auto it = index.iterator(q[0] - 1);
+                first_size = it.size();
+                for (uint32_t i = 0; i != first_size; ++i) {
+                    first[i] = it.access(i);
+                }
+            }
+
+            {
+                auto it = index.iterator(q[1] - 1);
+                second_size = it.size();
+                for (uint32_t i = 0; i != second_size; ++i) {
+                    second[i] = it.access(i);
+                }
+            }
+
+            auto end = std::set_intersection(
+                first.begin(), first.begin() + first_size, second.begin(),
+                second.begin() + second_size, intersection.begin());
+            first_size = std::distance(intersection.begin(), end);
+            first.swap(intersection);
+
+            for (uint32_t i = 2; i != q.size(); ++i) {
+                auto it = index.iterator(q[i] - 1);
+                second_size = it.size();
+                for (uint32_t i = 0; i != second_size; ++i) {
+                    second[i] = it.access(i);
+                }
+                end = std::set_intersection(
+                    first.begin(), first.begin() + first_size, second.begin(),
+                    second.begin() + second_size, intersection.begin());
+                first_size = std::distance(intersection.begin(), end);
+                first.swap(intersection);
+            }
+
+            auto it = index.intersection_iterator(q);
+            uint32_t n = 0;
+            for (; it.has_next(); ++n, ++it) {
+                auto doc_id = *it;
+                REQUIRE_MESSAGE(
+                    doc_id == first[n],
+                    "expected doc_id " << first[n] << " but got " << doc_id);
+            }
+            REQUIRE_MESSAGE(n == first_size, "expected " << first_size
+                                                         << " results, but got "
+                                                         << n);
+        }
+        std::remove(output_filename);
+    }
+}
diff --git a/archive/test/test_locate_prefix.cpp b/archive/test/test_locate_prefix.cpp
new file mode 100644
index 0000000..1a81693
--- /dev/null
+++ b/archive/test/test_locate_prefix.cpp
@@ -0,0 +1,102 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+typedef ef_completion_trie completion_trie_type;
+
+template <typename Dictionary, typename Index>
+void test_locate_prefix(Dictionary const& dict, Index const& index,
+                        std::vector<std::string> const& queries,
+                        std::vector<std::string> const& strings) {
+    for (auto const& query : queries) {
+        range expected = testing::locate_prefix(strings, query);
+        completion_type prefix;
+        byte_range suffix;
+        parse(dict, query, prefix, suffix, true);
+
+        range suffix_lex_range = dict.locate_prefix(suffix);
+        suffix_lex_range.begin += 1;
+        suffix_lex_range.end += 1;
+        range got = index.locate_prefix(prefix, suffix_lex_range);
+
+        CHECK_MESSAGE((got.begin == expected.begin and got.end == expected.end),
+                      "Error for query '"
+                          << query << "': expected [" << expected.begin << ","
+                          << expected.end << ") but got [" << got.begin << ","
+                          << got.end << ")");
+    }
+}
+
+TEST_CASE("test locate_prefix()") {
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    fc_dictionary_type dict;
+    {
+        fc_dictionary_type::builder builder(params);
+        builder.build(dict);
+    }
+
+    std::vector<std::string> strings;
+
+    {
+        essentials::logger("loading all strings...");
+        std::string line;
+        strings.reserve(params.num_completions);
+        std::ifstream input((params.collection_basename).c_str(),
+                            std::ios_base::in);
+        for (uint32_t i = 0; i != params.num_completions; ++i) {
+            if (!std::getline(input, line)) break;
+            auto s = line.substr(line.find(' ') + 1, line.size());
+            strings.push_back(s);
+        }
+        input.close();
+        essentials::logger("loaded " + std::to_string(strings.size()) +
+                           " strings");
+    }
+
+    constexpr uint32_t max_num_queries = 5000;
+    std::vector<std::string> queries;
+    static std::vector<float> percentages = {0.0, 0.25, 0.50, 0.75, 1.0};
+    static std::vector<uint32_t> query_terms = {1, 2, 3, 4, 5, 6, 7};
+
+    completion_trie_type ct_index;
+    integer_fc_dictionary_type fc_index;
+
+    {
+        completion_trie_type::builder builder(params);
+        builder.build(ct_index);
+        REQUIRE(ct_index.size() == params.num_completions);
+    }
+
+    {
+        integer_fc_dictionary_type::builder builder(params);
+        builder.build(fc_index);
+        REQUIRE(fc_index.size() == params.num_completions);
+    }
+
+    for (auto perc : percentages) {
+        for (auto num_terms : query_terms) {
+            std::cout << "percentage " << perc * 100.0 << "%, num_terms "
+                      << num_terms << std::endl;
+            {
+                queries.clear();
+                std::string filename =
+                    params.collection_basename +
+                    ".queries/queries.length=" + std::to_string(num_terms);
+                std::ifstream querylog(filename.c_str());
+                if (!querylog.is_open()) {
+                    std::cerr << "cannot open file '" << filename << "'"
+                              << std::endl;
+                    return;
+                }
+                load_queries(queries, max_num_queries, perc, querylog);
+                querylog.close();
+            }
+
+            test_locate_prefix(dict, ct_index, queries, strings);
+            test_locate_prefix(dict, fc_index, queries, strings);
+        }
+    }
+}
diff --git a/archive/test/test_unsorted_list.cpp b/archive/test/test_unsorted_list.cpp
new file mode 100644
index 0000000..2760532
--- /dev/null
+++ b/archive/test/test_unsorted_list.cpp
@@ -0,0 +1,172 @@
+#include "test_common.hpp"
+
+using namespace autocomplete;
+
+uint32_t naive_topk(std::vector<id_type> const& input, range r, uint32_t k,
+                    std::vector<id_type>& topk, bool unique = false) {
+    uint32_t range_len = r.end - r.begin;
+    for (uint32_t i = 0; i != range_len; ++i) {
+        topk[i] = input[r.begin + i];
+    }
+    std::sort(topk.begin(), topk.begin() + range_len);
+    uint32_t results = 0;
+    if (unique) {
+        auto end = std::unique(topk.begin(), topk.begin() + range_len);
+        results = std::min<uint32_t>(k, std::distance(topk.begin(), end));
+    } else {
+        results = std::min<uint32_t>(k, range_len);
+    }
+    return results;
+}
+
+std::vector<range> gen_random_queries(uint32_t num_queries,
+                                      uint32_t max_range_len) {
+    std::vector<range> queries;
+    queries.reserve(num_queries);
+    essentials::uniform_int_rng<uint32_t> random(0, max_range_len);
+    for (uint32_t i = 0; i != num_queries; ++i) {
+        uint32_t x = random.gen();
+        uint32_t y = random.gen();
+        range r;
+        if (y > x) {
+            r = {x, y};
+        } else {
+            r = {y, x};
+        }
+        queries.push_back(r);
+    }
+    return queries;
+}
+
+TEST_CASE("test unsorted_list on doc_ids") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    static const uint32_t k = 10;
+    static_assert(k <= constants::MAX_K, "k must be less than max allowed");
+    static const uint32_t num_queries = 5000;
+
+    std::vector<id_type> doc_ids;
+
+    {
+        doc_ids.reserve(params.num_completions);
+        std::ifstream input(params.collection_basename + ".mapped",
+                            std::ios_base::in);
+        completion_iterator it(params, input);
+        while (input) {
+            auto const& record = *it;
+            doc_ids.push_back(record.doc_id);
+            ++it;
+        }
+        input.close();
+
+        // {
+        //     // must have all ids from 0 to doc_ids.size() - 1
+        //     // NOTE: not true if we filter out some strings to be used as
+        //     // queries
+        //     std::vector<id_type> tmp = doc_ids;
+        //     std::sort(tmp.begin(), tmp.end());
+        //     for (id_type id = 0; id != doc_ids.size(); ++id) {
+        //         REQUIRE_MESSAGE(tmp[id] == id,
+        //                         "Error: id " << id << " not found");
+        //     }
+        // }
+
+        unsorted_list_type list;
+        list.build(doc_ids);
+        REQUIRE(list.size() == doc_ids.size());
+        essentials::save<unsorted_list_type>(list, output_filename);
+    }
+
+    {
+        unsorted_list_type list;
+        essentials::load(list, output_filename);
+
+        std::vector<id_type> topk(constants::MAX_K);
+        auto queries = gen_random_queries(num_queries, doc_ids.size());
+        std::vector<id_type> expected(params.num_completions);
+
+        for (auto q : queries) {
+            uint32_t expected_results = naive_topk(doc_ids, q, k, expected);
+            uint32_t results = list.topk(q, k, topk);
+            REQUIRE_MESSAGE(expected_results == results,
+                            "Error: expected " << expected_results
+                                               << " topk elements but got "
+                                               << results);
+            for (uint32_t i = 0; i != results; ++i) {
+                REQUIRE_MESSAGE(topk[i] == expected[i],
+                                "Error: expected " << expected[i] << " but got "
+                                                   << topk[i]);
+            }
+        }
+
+        std::remove(output_filename);
+    }
+}
+
+TEST_CASE("test unsorted_list on minimal doc_ids") {
+    char const* output_filename = testing::tmp_filename.c_str();
+    parameters params;
+    params.collection_basename = testing::test_filename.c_str();
+    params.load();
+
+    static const uint32_t k = 10;
+    static_assert(k <= constants::MAX_K, "k must be less than max allowed");
+    static const uint32_t num_queries = 5000;
+
+    std::vector<id_type> doc_ids;
+
+    {
+        doc_ids.reserve(params.num_terms);
+        std::ifstream input((params.collection_basename + ".inverted").c_str(),
+                            std::ios_base::in);
+        id_type first;
+        for (uint64_t i = 0; i != params.num_terms; ++i) {
+            uint32_t n = 0;
+            input >> n;
+            input >> first;
+            doc_ids.push_back(first);
+            for (uint64_t k = 1; k != n; ++k) {
+                id_type x;
+                input >> x;
+                (void)x;  // discard
+            }
+        }
+        input.close();
+        REQUIRE(doc_ids.size() == params.num_terms);
+
+        unsorted_list_type list;
+        list.build(doc_ids);
+        REQUIRE(list.size() == doc_ids.size());
+        essentials::save<unsorted_list_type>(list, output_filename);
+    }
+
+    {
+        unsorted_list_type list;
+        essentials::load(list, output_filename);
+
+        std::vector<id_type> topk(constants::MAX_K);
+        auto queries = gen_random_queries(num_queries, doc_ids.size());
+        constexpr bool unique = true;
+        std::vector<id_type> expected(params.num_terms);
+
+        for (auto q : queries) {
+            uint32_t expected_results =
+                naive_topk(doc_ids, q, k, expected, unique);
+            uint32_t results = list.topk(q, k, topk, unique);
+            REQUIRE_MESSAGE(expected_results == results,
+                            "Error: expected " << expected_results
+                                               << " topk elements but got "
+                                               << results);
+            for (uint32_t i = 0; i != results; ++i) {
+                REQUIRE_MESSAGE(topk[i] == expected[i],
+                                "Error: expected " << expected[i] << " but got "
+                                                   << topk[i]);
+            }
+        }
+
+        std::remove(output_filename);
+    }
+}
\ No newline at end of file
diff --git a/test_data/build_inverted_and_forward.py b/archive/test_data/build_inverted_and_forward.py
similarity index 89%
rename from test_data/build_inverted_and_forward.py
rename to archive/test_data/build_inverted_and_forward.py
index c627699..0966d99 100644
--- a/test_data/build_inverted_and_forward.py
+++ b/archive/test_data/build_inverted_and_forward.py
@@ -1,5 +1,4 @@
 import sys
-import numpy as np
 
 input_filename = sys.argv[1]
 
@@ -20,9 +19,11 @@
 num_docs = 0
 with open(input_filename + ".mapped.stats") as f:
     num_terms = int(f.readline())
-    print num_terms
+    print("terms: " + str(num_terms))
+    f.readline() # skip line: max num. of query terms
+    f.readline() # skip line: num. of completions
     num_docs = int(f.readline())
-    print num_docs
+    print("universe: " + str(num_docs))
 
 inverted_index = [[] for i in range(num_terms + 1)] # id 0 is not assigned
 forward_index = [[] for i in range(num_docs)]
@@ -35,7 +36,7 @@
         discard = False
         for i in range(1, len(x)):
             try:
-                term = x[i].encode('utf-8')
+                term = x[i]
                 try:
                     term_id = tokens[term]
                     if term_id not in mapped:
@@ -51,7 +52,7 @@
         if not discard:
             # NOTE: not sorted!
             if doc_id >= num_docs:
-                print doc_id,num_docs
+                print(doc_id,num_docs)
             forward_index[doc_id] = mapped;
 
             lines += 1
diff --git a/test_data/build_stats.py b/archive/test_data/build_stats.py
similarity index 76%
rename from test_data/build_stats.py
rename to archive/test_data/build_stats.py
index f9923f0..880bcd3 100644
--- a/test_data/build_stats.py
+++ b/archive/test_data/build_stats.py
@@ -8,10 +8,17 @@
 
 output_file = open(input_filename + ".stats", 'a')
 prev = []
+universe = 0;
 with open(input_filename, 'r') as f:
     for line in f:
         x = line.rstrip('\n').split()
+        docid = int(x[0])
+
+        if docid > universe:
+            universe = docid
+
         q = x[1:len(x)]
+
         level_id = 0
         while level_id < len(q) and level_id < len(prev) and q[level_id] == prev[level_id]:
             level_id += 1
@@ -31,8 +38,12 @@
 # number of completions
 # number of levels in the trie
 # number of nodes for each level
+print("universe: " + str(universe + 1))
+print("completions: " + str(lines))
 output_file.write(str(lines) + "\n")
+output_file.write(str(universe + 1) + "\n")
 output_file.write(str(len(nodes_per_level)) + "\n")
-for key, value in sorted(nodes_per_level.iteritems(), key = lambda kv: kv[0]):
+for key, value in sorted(nodes_per_level.items(), key = lambda kv: kv[0]):
     output_file.write(str(value) + "\n")
-output_file.close()
\ No newline at end of file
+output_file.close()
+
diff --git a/test_data/extract_dict.py b/archive/test_data/extract_dict.py
similarity index 78%
rename from test_data/extract_dict.py
rename to archive/test_data/extract_dict.py
index 875f85b..e9b48d0 100644
--- a/test_data/extract_dict.py
+++ b/archive/test_data/extract_dict.py
@@ -1,9 +1,8 @@
 import sys
-from sets import Set
 
 input_filename = sys.argv[1]
 
-tokens = Set({})
+tokens = set()
 lines = 0
 
 print("parsing input file...")
@@ -14,12 +13,12 @@
             tokens.add(x[i])
         lines += 1
         if lines % 1000000 == 0:
-            print "processed " + str(lines) + " lines"
+            print("processed " + str(lines) + " lines")
 
 print("processed " + str(lines) + " lines")
 print("dictionary has " + str(len(tokens)) + " keys")
 
 dict_file = open(input_filename + ".dict", 'w')
 for key in sorted(tokens):
-    dict_file.write(key.encode('utf-8') + "\n")
+    dict_file.write(key + "\n")
 dict_file.close()
\ No newline at end of file
diff --git a/archive/test_data/filter_and_preprocess.sh b/archive/test_data/filter_and_preprocess.sh
new file mode 100644
index 0000000..9a5d787
--- /dev/null
+++ b/archive/test_data/filter_and_preprocess.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+echo $1 # input filename
+
+# number of completions to exclude per completion size,
+# e.g., if it is 100, then at most 7 x 100 completions are filtered out
+echo $2
+
+python partition_queries_by_length.py $1 $1.filtered.queries $2
+python filter_dataset.py $1 $1.filtered.queries
+python extract_dict.py $1.filtered
+python map_dataset.py $1.filtered
+python build_stats.py $1.filtered.mapped
+python build_inverted_and_forward.py $1.filtered
diff --git a/archive/test_data/filter_dataset.py b/archive/test_data/filter_dataset.py
new file mode 100644
index 0000000..dc68a28
--- /dev/null
+++ b/archive/test_data/filter_dataset.py
@@ -0,0 +1,32 @@
+import sys
+from sets import Set
+
+input_filename = sys.argv[1]
+queries_directory = sys.argv[2]
+
+to_filter = Set({})
+print("loading strings to filter...")
+for i in range(1,7):
+    with open(queries_directory + "/queries.length=" + str(i)) as f:
+        for line in f:
+            s = line.rstrip('\n')
+            to_filter.add(s)
+with open(queries_directory + "/queries.length=7+") as f:
+    for line in f:
+        s = line.rstrip('\n')
+        to_filter.add(s)
+
+lines = 0
+print("filtering dataset...")
+
+output_file = open(input_filename + ".filtered", 'w')
+with open(input_filename, 'r') as f:
+    for line in f:
+        x = line.rstrip('\n').split()
+        string = ' '.join(x[1:len(x)])
+        if string not in to_filter:
+            output_file.write(line)
+        lines += 1
+        if lines % 1000000 == 0:
+            print("processed " + str(lines) + " lines")
+output_file.close()
\ No newline at end of file
diff --git a/test_data/map_dataset.py b/archive/test_data/map_dataset.py
similarity index 95%
rename from test_data/map_dataset.py
rename to archive/test_data/map_dataset.py
index 86e6357..beb7155 100644
--- a/test_data/map_dataset.py
+++ b/archive/test_data/map_dataset.py
@@ -24,7 +24,7 @@
         string_len = 0;
         mapped = [x[0]]
         for i in range(1, len(x)): # x[0] stores the docID
-            t = x[i].encode('utf-8')
+            t = x[i]
             try:
                 id = tokens[t]
                 mapped.append(id)
@@ -48,4 +48,4 @@
 
 stats_file.write(str(len(tokens)) + "\n")
 stats_file.write(str(max_string_len) + "\n")
-stats_file.close()
\ No newline at end of file
+stats_file.close()
diff --git a/archive/test_data/partition_queries_by_length.py b/archive/test_data/partition_queries_by_length.py
new file mode 100644
index 0000000..3d3823b
--- /dev/null
+++ b/archive/test_data/partition_queries_by_length.py
@@ -0,0 +1,40 @@
+import sys, os, random
+
+input_filename = sys.argv[1]
+output_directory = sys.argv[2]
+n = int(sys.argv[3])
+
+if not os.path.exists(output_directory):
+    os.makedirs(output_directory)
+
+num_shards = 6
+files = [open(output_directory + "/queries.length=" + str(i), "w") for i in range(1,num_shards + 1)]
+all_others = open(output_directory + "/queries.length=" + str(num_shards + 1) + "+", "w")
+
+strings = [[] for i in range(num_shards)]
+all_others_strings = []
+
+lines = 0
+with open(input_filename, 'r') as f:
+    for line in f:
+        x = line.rstrip('\n').split()
+        l = len(x) - 1
+        string = ' '.join(x[1:l+1]) + '\n'
+        if l > num_shards:
+            all_others_strings.append(string)
+        else:
+            strings[l - 1].append(string)
+        lines += 1
+        if lines % 1000000 == 0:
+            print("processed " + str(lines) + " lines")
+
+for i in range(num_shards):
+    random.shuffle(strings[i])
+    for k in range(min(n, len(strings[i]))):
+        files[i].write(strings[i][k])
+    files[i].close()
+
+random.shuffle(all_others_strings)
+for k in range(min(n, len(all_others_strings))):
+    all_others.write(all_others_strings[k])
+all_others.close()
diff --git a/archive/test_data/preprocess.sh b/archive/test_data/preprocess.sh
new file mode 100755
index 0000000..b795bfe
--- /dev/null
+++ b/archive/test_data/preprocess.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+echo $1 # input filename
+echo $2 # number of queries for each size
+python3 extract_dict.py $1
+python3 map_dataset.py $1
+python3 build_stats.py $1.mapped
+python3 build_inverted_and_forward.py $1
+python3 partition_queries_by_length.py $1 $1.queries $2
diff --git a/test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions b/archive/test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions
similarity index 100%
rename from test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions
rename to archive/test_data/trec_05_efficiency_queries/trec_05_efficiency_queries.completions
diff --git a/web/index.html b/archive/web/index.html
similarity index 100%
rename from web/index.html
rename to archive/web/index.html
diff --git a/web/jquery-1.8.2.min.js b/archive/web/jquery-1.8.2.min.js
similarity index 100%
rename from web/jquery-1.8.2.min.js
rename to archive/web/jquery-1.8.2.min.js
diff --git a/web/jquery.autocomplete.js b/archive/web/jquery.autocomplete.js
similarity index 100%
rename from web/jquery.autocomplete.js
rename to archive/web/jquery.autocomplete.js
diff --git a/web/styles.css b/archive/web/styles.css
similarity index 93%
rename from web/styles.css
rename to archive/web/styles.css
index 5db5234..b540533 100644
--- a/web/styles.css
+++ b/archive/web/styles.css
@@ -9,4 +9,4 @@
 .autocomplete-group { padding: 2px 5px; }
 .autocomplete-group strong { font-weight: bold; font-size: 16px; color: #000; display: block; border-bottom: 1px solid #000; }
 
-input { font-size: 28px; padding: 10px; border: 1px solid #CCC; display: block; margin: 20px 0; }
+input { font-size: 18px; padding: 10px; border: 1px solid #CCC; display: block; margin: 20px 0; }
diff --git a/web/topkcomp.js b/archive/web/topkcomp.js
similarity index 100%
rename from web/topkcomp.js
rename to archive/web/topkcomp.js
diff --git a/autocomplete-rs/.gitignore b/autocomplete-rs/.gitignore
new file mode 100644
index 0000000..da95885
--- /dev/null
+++ b/autocomplete-rs/.gitignore
@@ -0,0 +1,18 @@
+# Cargo
+target/
+
+# IDEs
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+
+# Rust
+
+# Build
+build.rs
+
+# Cargo.lock
+Cargo.lock
+
diff --git a/autocomplete-rs/Cargo.lock b/autocomplete-rs/Cargo.lock
new file mode 100644
index 0000000..6222344
--- /dev/null
+++ b/autocomplete-rs/Cargo.lock
@@ -0,0 +1,857 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler2"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
+dependencies = [
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.98"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
+
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+
+[[package]]
+name = "autocomplete-rs"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "futures",
+ "tempfile",
+ "tokio",
+ "tonic-build",
+]
+
+[[package]]
+name = "backtrace"
+version = "0.3.75"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-targets",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
+
+[[package]]
+name = "bytes"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clap"
+version = "4.5.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
+
+[[package]]
+name = "fixedbitset"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-executor"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
+]
+
+[[package]]
+name = "gimli"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+
+[[package]]
+name = "hashbrown"
+version = "0.15.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "indexmap"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.172"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
+[[package]]
+name = "lock_api"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
+dependencies = [
+ "adler2",
+]
+
+[[package]]
+name = "mio"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
+dependencies = [
+ "libc",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "multimap"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
+
+[[package]]
+name = "object"
+version = "0.36.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
+]
+
+[[package]]
+name = "petgraph"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
+dependencies = [
+ "fixedbitset",
+ "indexmap",
+]
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "prettyplease"
+version = "0.2.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "prost"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
+dependencies = [
+ "bytes",
+ "heck",
+ "itertools",
+ "log",
+ "multimap",
+ "once_cell",
+ "petgraph",
+ "prettyplease",
+ "prost",
+ "prost-types",
+ "regex",
+ "syn",
+ "tempfile",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
+dependencies = [
+ "anyhow",
+ "itertools",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.12.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0"
+dependencies = [
+ "prost",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
+[[package]]
+name = "rustix"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
+
+[[package]]
+name = "socket2"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "syn"
+version = "2.0.101"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
+dependencies = [
+ "fastrand",
+ "getrandom",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "tokio"
+version = "1.45.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2513ca694ef9ede0fb23fe71a4ee4107cb102b9dc1930f6d0fd77aae068ae165"
+dependencies = [
+ "backtrace",
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tonic-build"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d021fc044c18582b9a2408cd0dd05b1596e3ecdb5c4df822bb0183545683889"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wasi"
+version = "0.14.2+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
+dependencies = [
+ "bitflags",
+]
diff --git a/autocomplete-rs/Cargo.toml b/autocomplete-rs/Cargo.toml
new file mode 100644
index 0000000..68ed87f
--- /dev/null
+++ b/autocomplete-rs/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "autocomplete-rs"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+tokio = { version = "1.0", features = ["full"] }
+futures = "0.3"
+clap = { version = "4.4", features = ["derive"] }
+
+[dev-dependencies]
+tempfile = "3.8"
+
+[build-dependencies]
+tonic-build = "0.10"
diff --git a/autocomplete-rs/LICENSE b/autocomplete-rs/LICENSE
new file mode 100644
index 0000000..d874d0b
--- /dev/null
+++ b/autocomplete-rs/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Autocomplete Service Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE. 
\ No newline at end of file
diff --git a/autocomplete-rs/README.md b/autocomplete-rs/README.md
new file mode 100644
index 0000000..4c11811
--- /dev/null
+++ b/autocomplete-rs/README.md
@@ -0,0 +1,118 @@
+# Autocomplete Service
+
+A high-performance autocomplete service written in Rust, supporting both gRPC and GraphQL interfaces.
+
+## Features
+
+- **Dual API Support**
+  - gRPC interface for high-performance RPC calls
+  - GraphQL interface for flexible querying
+  - Shared backend implementation for both APIs
+
+- **Core Features**
+  - Fast prefix-based autocomplete
+  - Score-based ranking of suggestions
+  - Memory-efficient string storage
+  - Concurrent request handling
+
+- **API Endpoints**
+  - gRPC: `[::1]:50051` (configurable)
+  - GraphQL: `[::1]:8000/graphql` (configurable)
+  - GraphQL Playground: `[::1]:8000/playground`
+
+## Project Status
+
+### Completed
+- ✅ Basic autocomplete implementation
+- ✅ gRPC server implementation
+- ✅ GraphQL server implementation
+- ✅ Command-line configuration
+- ✅ Shared backend between APIs
+
+### In Progress
+- 🔄 Documentation
+- 🔄 Testing suite
+- 🔄 Performance benchmarks
+
+### Planned
+- ⏳ Authentication
+- ⏳ Rate limiting
+- ⏳ Metrics and monitoring
+- ⏳ Docker support
+- ⏳ Client examples in multiple languages
+
+## Getting Started
+
+### Prerequisites
+- Rust 1.70 or later
+- Cargo
+
+### Building
+```bash
+cargo build --release
+```
+
+### Running
+```bash
+# Default configuration
+cargo run
+
+# Custom addresses
+cargo run -- --grpc-addr 127.0.0.1:50051 --graphql-addr 127.0.0.1:8000
+
+# Show help
+cargo run -- --help
+```
+
+## API Usage
+
+### gRPC
+```protobuf
+service AutocompleteService {
+    rpc Complete(CompleteRequest) returns (CompleteResponse);
+    rpc Init(InitRequest) returns (InitResponse);
+    rpc GetStats(StatsRequest) returns (StatsResponse);
+}
+```
+
+### GraphQL
+```graphql
+type Query {
+    complete(prefix: String!, maxResults: Int): CompleteResponse!
+    stats: StatsResponse!
+}
+
+type Mutation {
+    init(strings: [StringInput!]!): InitResponse!
+}
+```
+
+## Project Structure
+
+```
+autocomplete-rs/
+├── src/
+│   ├── main.rs           # Entry point and CLI
+│   ├── autocomplete.rs   # Core autocomplete logic
+│   ├── graphql.rs        # GraphQL schema and resolvers
+│   ├── server.rs         # Server implementations
+│   ├── string_pool.rs    # String interning
+│   ├── trie.rs          # Trie data structure
+│   └── types.rs         # Common types
+├── proto/
+│   └── autocomplete.proto # gRPC service definition
+└── schema/
+    └── schema.graphql    # GraphQL schema
+```
+
+## Contributing
+
+1. Fork the repository
+2. Create your feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+
+## License
+
+This project is licensed under the MIT License - see the LICENSE file for details. 
\ No newline at end of file
diff --git a/autocomplete-rs/build.rs b/autocomplete-rs/build.rs
new file mode 100644
index 0000000..ed0ba48
--- /dev/null
+++ b/autocomplete-rs/build.rs
@@ -0,0 +1,3 @@
+fn main() {
+    // No build-time code generation needed
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/examples/client.rs b/autocomplete-rs/examples/client.rs
new file mode 100644
index 0000000..cbdb2c9
--- /dev/null
+++ b/autocomplete-rs/examples/client.rs
@@ -0,0 +1,36 @@
+use autocomplete_proto::{
+    autocomplete_service_client::AutocompleteServiceClient,
+    CompleteRequest, InitRequest, StringScore,
+};
+
+pub mod autocomplete_proto {
+    tonic::include_proto!("autocomplete");
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let mut client = AutocompleteServiceClient::connect("http://[::1]:50051").await?;
+
+    // Initialize with some test data
+    let init_request = InitRequest {
+        strings: vec![
+            StringScore { text: "hello".to_string(), score: 1.0 },
+            StringScore { text: "help".to_string(), score: 0.8 },
+            StringScore { text: "hell".to_string(), score: 0.6 },
+        ],
+    };
+
+    let response = client.init(init_request).await?;
+    println!("INIT RESPONSE: {:?}", response);
+
+    // Get completions
+    let request = CompleteRequest {
+        prefix: "hel".to_string(),
+        max_results: 10,
+    };
+
+    let response = client.complete(request).await?;
+    println!("COMPLETE RESPONSE: {:?}", response);
+
+    Ok(())
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/schema/schema.graphql b/autocomplete-rs/schema/schema.graphql
new file mode 100644
index 0000000..70da230
--- /dev/null
+++ b/autocomplete-rs/schema/schema.graphql
@@ -0,0 +1,41 @@
+type Query {
+    # Get completions for a prefix
+    complete(prefix: String!, maxResults: Int): CompleteResponse!
+    
+    # Get system statistics
+    stats: Stats!
+}
+
+type Mutation {
+    # Initialize the autocomplete system with strings and scores
+    init(strings: [StringScoreInput!]!): InitResponse!
+}
+
+# Input type for string with score
+input StringScoreInput {
+    text: String!
+    score: Float!
+}
+
+# Response type for completions
+type CompleteResponse {
+    completions: [Completion!]!
+}
+
+# A single completion result
+type Completion {
+    text: String!
+    score: Float!
+}
+
+# Response type for initialization
+type InitResponse {
+    success: Boolean!
+    error: String
+}
+
+# System statistics
+type Stats {
+    numTerms: Int!
+    memoryBytes: Int!
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/autocomplete.rs b/autocomplete-rs/src/autocomplete.rs
new file mode 100644
index 0000000..bb9ffa6
--- /dev/null
+++ b/autocomplete-rs/src/autocomplete.rs
@@ -0,0 +1,45 @@
+use crate::types::ScoreType;
+use crate::trie::Trie;
+use super::dictionary::Dictionary;
+
+#[derive(Clone)]
+pub struct Autocomplete {
+    trie: Trie,
+    dictionary: Dictionary,
+}
+
+impl Autocomplete {
+    pub fn new() -> Self {
+        Self {
+            trie: Trie::new(),
+            dictionary: Dictionary::new(),
+        }
+    }
+
+    pub fn init(&mut self, strings: &[(String, ScoreType)]) -> Result<(), String> {
+        for (string, score) in strings {
+            let id = self.dictionary.insert(string.clone());
+            self.trie.insert(string, id, *score);
+        }
+        Ok(())
+    }
+
+    pub fn complete(&self, prefix: &str) -> Vec<(String, ScoreType)> {
+        let completions = self.trie.complete(prefix);
+        completions
+            .into_iter()
+            .filter_map(|(id, score)| {
+                self.dictionary.get(id).map(|text| (text.to_string(), score))
+            })
+            .collect()
+    }
+
+    pub fn num_terms(&self) -> usize {
+        self.dictionary.len()
+    }
+
+    pub fn bytes(&self) -> usize {
+        // TODO: Implement actual memory usage calculation
+        0
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/constants.rs b/autocomplete-rs/src/constants.rs
new file mode 100644
index 0000000..b949eb7
--- /dev/null
+++ b/autocomplete-rs/src/constants.rs
@@ -0,0 +1,8 @@
+// Constants for the autocomplete system
+pub const MAX_K: u32 = 15;
+pub const MAX_NUM_TERMS_PER_QUERY: u32 = 64;
+pub const MAX_NUM_CHARS_PER_QUERY: u32 = 128;
+pub const POOL_SIZE: usize = (MAX_K as usize) * (MAX_NUM_CHARS_PER_QUERY as usize);
+
+// Compile-time assertion
+const _: () = assert!(MAX_NUM_TERMS_PER_QUERY < 256, "MAX_NUM_TERMS_PER_QUERY must be < 256"); 
\ No newline at end of file
diff --git a/autocomplete-rs/src/dictionary.rs b/autocomplete-rs/src/dictionary.rs
new file mode 100644
index 0000000..a09adda
--- /dev/null
+++ b/autocomplete-rs/src/dictionary.rs
@@ -0,0 +1,46 @@
+use crate::types::IdType;
+
+#[derive(Clone)]
+pub struct Dictionary {
+    strings: Vec<String>,
+    id_map: std::collections::HashMap<String, IdType>,
+    next_id: IdType,
+}
+
+impl Dictionary {
+    pub fn new() -> Self {
+        Self {
+            strings: Vec::new(),
+            id_map: std::collections::HashMap::new(),
+            next_id: 0,
+        }
+    }
+
+    pub fn insert(&mut self, string: String) -> IdType {
+        if let Some(&id) = self.id_map.get(&string) {
+            return id;
+        }
+
+        let id = self.next_id;
+        self.next_id += 1;
+        self.strings.push(string.clone());
+        self.id_map.insert(string, id);
+        id
+    }
+
+    pub fn get(&self, id: IdType) -> Option<&str> {
+        self.strings.get(id as usize).map(|s| s.as_str())
+    }
+
+    pub fn get_id(&self, string: &str) -> Option<IdType> {
+        self.id_map.get(string).copied()
+    }
+
+    pub fn len(&self) -> usize {
+        self.strings.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.strings.is_empty()
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/index.rs b/autocomplete-rs/src/index.rs
new file mode 100644
index 0000000..29da316
--- /dev/null
+++ b/autocomplete-rs/src/index.rs
@@ -0,0 +1,182 @@
+use crate::types::{IdType, ScoreType};
+use crate::trie::Trie;
+use crate::dictionary::Dictionary;
+
+#[derive(Clone)]
+pub struct Index {
+    trie: Trie,
+    dictionary: Dictionary,
+}
+
+impl Index {
+    pub fn new() -> Self {
+        Self {
+            trie: Trie::new(),
+            dictionary: Dictionary::new(),
+        }
+    }
+
+    pub fn add_doc(&mut self, _doc_id: IdType, text: &str, score: ScoreType) {
+        let id = self.dictionary.insert(text.to_string());
+        self.trie.insert(text, id, score);
+    }
+
+    pub fn search(&self, prefix: &str) -> Vec<(IdType, ScoreType)> {
+        let completions = self.trie.complete(prefix);
+        completions
+            .into_iter()
+            .filter_map(|(id, score)| {
+                self.dictionary.get(id).map(|_| (id, score))
+            })
+            .collect()
+    }
+
+    pub fn num_terms(&self) -> usize {
+        self.dictionary.len()
+    }
+
+    pub fn bytes(&self) -> usize {
+        // TODO: Implement actual memory usage calculation
+        0
+    }
+}
+
+/// Blocked inverted index for efficient document retrieval
+pub struct BlockedInvertedIndex {
+    blocks: Vec<Vec<IdType>>,
+    block_size: usize,
+}
+
+impl BlockedInvertedIndex {
+    /// Create a new blocked inverted index
+    pub fn new(block_size: usize) -> Self {
+        Self {
+            blocks: Vec::new(),
+            block_size,
+        }
+    }
+
+    /// Add a document to the index
+    pub fn insert(&mut self, id: IdType) {
+        if self.blocks.is_empty() || self.blocks.last().unwrap().len() >= self.block_size {
+            self.blocks.push(Vec::with_capacity(self.block_size));
+        }
+        self.blocks.last_mut().unwrap().push(id);
+    }
+
+    /// Get documents for a term
+    pub fn get(&self, block_id: usize) -> Option<&[IdType]> {
+        self.blocks.get(block_id).map(|v| v.as_slice())
+    }
+
+    /// Get the number of blocks
+    pub fn num_blocks(&self) -> usize {
+        self.blocks.len()
+    }
+
+    /// Get the block size
+    pub fn block_size(&self) -> usize {
+        self.block_size
+    }
+}
+
+/// Compact vector for efficient storage
+pub struct CompactVector {
+    data: Vec<u8>,
+    element_size: usize,
+    num_elements: usize,
+}
+
+impl CompactVector {
+    /// Create a new compact vector
+    pub fn new(element_size: usize) -> Self {
+        Self {
+            data: Vec::new(),
+            element_size,
+            num_elements: 0,
+        }
+    }
+
+    /// Add an element to the vector
+    pub fn push(&mut self, element: &[u8]) {
+        assert_eq!(element.len(), self.element_size);
+        self.data.extend_from_slice(element);
+        self.num_elements += 1;
+    }
+
+    /// Get an element from the vector
+    pub fn get(&self, index: usize) -> Option<&[u8]> {
+        if index >= self.num_elements {
+            return None;
+        }
+        let start = index * self.element_size;
+        let end = start + self.element_size;
+        Some(&self.data[start..end])
+    }
+
+    /// Get the number of elements
+    pub fn size(&self) -> usize {
+        self.num_elements
+    }
+
+    /// Get the size in bytes
+    pub fn bytes(&self) -> usize {
+        self.data.len()
+    }
+}
+
+/// Bit vector for efficient bit-level operations
+pub struct BitVector {
+    data: Vec<u8>,
+    num_bits: usize,
+}
+
+impl BitVector {
+    /// Create a new bit vector
+    pub fn new(num_bits: usize) -> Self {
+        let num_bytes = (num_bits + 7) / 8;
+        Self {
+            data: vec![0; num_bytes],
+            num_bits,
+        }
+    }
+
+    /// Set a bit
+    pub fn set(&mut self, index: usize) {
+        if index < self.num_bits {
+            let byte_idx = index / 8;
+            let bit_idx = index % 8;
+            self.data[byte_idx] |= 1 << bit_idx;
+        }
+    }
+
+    /// Clear a bit
+    pub fn clear(&mut self, index: usize) {
+        if index < self.num_bits {
+            let byte_idx = index / 8;
+            let bit_idx = index % 8;
+            self.data[byte_idx] &= !(1 << bit_idx);
+        }
+    }
+
+    /// Test a bit
+    pub fn test(&self, index: usize) -> bool {
+        if index < self.num_bits {
+            let byte_idx = index / 8;
+            let bit_idx = index % 8;
+            (self.data[byte_idx] & (1 << bit_idx)) != 0
+        } else {
+            false
+        }
+    }
+
+    /// Get the number of bits
+    pub fn size(&self) -> usize {
+        self.num_bits
+    }
+
+    /// Get the size in bytes
+    pub fn bytes(&self) -> usize {
+        self.data.len()
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/lib.rs b/autocomplete-rs/src/lib.rs
new file mode 100644
index 0000000..70048c6
--- /dev/null
+++ b/autocomplete-rs/src/lib.rs
@@ -0,0 +1,19 @@
+pub mod dictionary;
+pub mod types;
+pub mod trie;
+pub mod constants;
+pub mod parameters;
+pub mod probe;
+pub mod string_pool;
+pub mod index;
+pub mod autocomplete;
+
+pub use dictionary::Dictionary;
+pub use types::*;
+pub use trie::*;
+pub use constants::*;
+pub use parameters::*;
+pub use probe::*;
+pub use string_pool::*;
+pub use index::*;
+pub use autocomplete::*; 
\ No newline at end of file
diff --git a/autocomplete-rs/src/main.rs b/autocomplete-rs/src/main.rs
new file mode 100644
index 0000000..751189f
--- /dev/null
+++ b/autocomplete-rs/src/main.rs
@@ -0,0 +1,25 @@
+use std::error::Error;
+use clap::Parser;
+
+/// Autocomplete service
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// Input file path
+    #[arg(short, long)]
+    input: Option<String>,
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn Error>> {
+    let args = Args::parse();
+
+    println!("Autocomplete Service");
+    
+    if let Some(input) = args.input {
+        println!("Processing input file: {}", input);
+        // TODO: Implement file processing logic
+    }
+
+    Ok(())
+}
diff --git a/autocomplete-rs/src/parameters.rs b/autocomplete-rs/src/parameters.rs
new file mode 100644
index 0000000..38d5fec
--- /dev/null
+++ b/autocomplete-rs/src/parameters.rs
@@ -0,0 +1,115 @@
+use std::fs::File;
+use std::io::{self, BufRead, BufReader};
+use std::path::Path;
+
+use crate::constants::{MAX_NUM_CHARS_PER_QUERY, MAX_NUM_TERMS_PER_QUERY};
+
+/// Parameters for the autocomplete system
+#[derive(Debug, Default)]
+pub struct Parameters {
+    pub num_terms: u32,
+    pub max_string_length: u32,
+    pub num_completions: u32,
+    pub universe: u32,
+    pub num_levels: u32,
+    pub nodes_per_level: Vec<u32>,
+    pub collection_basename: String,
+}
+
+impl Parameters {
+    /// Creates a new empty Parameters instance
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Loads parameters from a statistics file
+    pub fn load(&mut self) -> io::Result<()> {
+        let stats_path = if self.collection_basename.ends_with(".mapped.stats") {
+            Path::new(&self.collection_basename).to_path_buf()
+        } else {
+            Path::new(&self.collection_basename).with_extension("mapped.stats")
+        };
+        
+        let file = File::open(stats_path)?;
+        let reader = BufReader::new(file);
+        let mut lines = reader.lines();
+
+        // Read basic statistics
+        self.num_terms = lines.next()
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing num_terms"))??
+            .parse()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        self.max_string_length = lines.next()
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing max_string_length"))??
+            .parse()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        self.num_completions = lines.next()
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing num_completions"))??
+            .parse()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        self.universe = lines.next()
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing universe"))??
+            .parse()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        self.num_levels = lines.next()
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing num_levels"))??
+            .parse()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+
+        // Validate basic statistics
+        if self.num_terms == 0 {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "num_terms must be > 0"));
+        }
+        if self.max_string_length == 0 {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "max_string_length must be > 0"));
+        }
+        if self.num_completions == 0 {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "num_completions must be > 0"));
+        }
+        if self.universe < self.num_completions {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "universe must be >= num_completions"));
+        }
+        if self.num_levels == 0 {
+            return Err(io::Error::new(io::ErrorKind::InvalidData, "num_levels must be > 0"));
+        }
+
+        // Validate against constants
+        if self.max_string_length > MAX_NUM_CHARS_PER_QUERY {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                format!("max_string_length ({}) exceeds MAX_NUM_CHARS_PER_QUERY ({})",
+                    self.max_string_length, MAX_NUM_CHARS_PER_QUERY)
+            ));
+        }
+        if self.num_levels > MAX_NUM_TERMS_PER_QUERY {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                format!("num_levels ({}) exceeds MAX_NUM_TERMS_PER_QUERY ({})",
+                    self.num_levels, MAX_NUM_TERMS_PER_QUERY)
+            ));
+        }
+
+        // Read nodes per level
+        self.nodes_per_level = Vec::with_capacity(self.num_levels as usize);
+        for _ in 0..self.num_levels {
+            let count = lines.next()
+                .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Missing nodes_per_level data"))??
+                .parse()
+                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+            self.nodes_per_level.push(count);
+        }
+
+        if self.nodes_per_level.len() != self.num_levels as usize {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidData,
+                "File with statistics may be truncated or malformed"
+            ));
+        }
+
+        Ok(())
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/probe.rs b/autocomplete-rs/src/probe.rs
new file mode 100644
index 0000000..c82f825
--- /dev/null
+++ b/autocomplete-rs/src/probe.rs
@@ -0,0 +1,81 @@
+use std::time::{Duration, Instant};
+
+/// A trait for performance measurement probes
+pub trait Probe {
+    /// Start timing an operation
+    fn start(&mut self, id: u64);
+    /// Stop timing an operation
+    fn stop(&mut self, id: u64);
+}
+
+/// A no-operation probe that does nothing
+#[derive(Debug, Default)]
+pub struct NopProbe;
+
+impl Probe for NopProbe {
+    fn start(&mut self, _id: u64) {}
+    fn stop(&mut self, _id: u64) {}
+}
+
+/// A timer probe that measures operation durations
+#[derive(Debug)]
+pub struct TimerProbe {
+    timers: Vec<Timer>,
+}
+
+#[derive(Debug, Default, Clone)]
+struct Timer {
+    start_time: Option<Instant>,
+    total_duration: Duration,
+}
+
+impl Timer {
+    fn new() -> Self {
+        Self {
+            start_time: None,
+            total_duration: Duration::default(),
+        }
+    }
+
+    fn start(&mut self) {
+        self.start_time = Some(Instant::now());
+    }
+
+    fn stop(&mut self) {
+        if let Some(start) = self.start_time {
+            self.total_duration += start.elapsed();
+            self.start_time = None;
+        }
+    }
+
+    fn get_duration(&self) -> Duration {
+        self.total_duration
+    }
+}
+
+impl TimerProbe {
+    /// Creates a new TimerProbe with the specified number of timers
+    pub fn new(num_timers: u64) -> Self {
+        Self {
+            timers: vec![Timer::new(); num_timers as usize],
+        }
+    }
+
+    /// Gets the total duration for a specific timer
+    pub fn get_duration(&self, id: u64) -> Duration {
+        assert!(id < self.timers.len() as u64, "Timer ID out of bounds");
+        self.timers[id as usize].get_duration()
+    }
+}
+
+impl Probe for TimerProbe {
+    fn start(&mut self, id: u64) {
+        assert!(id < self.timers.len() as u64, "Timer ID out of bounds");
+        self.timers[id as usize].start();
+    }
+
+    fn stop(&mut self, id: u64) {
+        assert!(id < self.timers.len() as u64, "Timer ID out of bounds");
+        self.timers[id as usize].stop();
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/string_pool.rs b/autocomplete-rs/src/string_pool.rs
new file mode 100644
index 0000000..332645a
--- /dev/null
+++ b/autocomplete-rs/src/string_pool.rs
@@ -0,0 +1,157 @@
+use crate::types::{ByteRange, IdType};
+
+/// Represents a scored byte range
+#[derive(Debug, Clone)]
+pub struct ScoredByteRange {
+    pub string: ByteRange,
+    pub score: IdType,
+}
+
+/// Manages a pool of scored strings
+pub struct ScoredStringPool {
+    data: Vec<u8>,
+    offsets: Vec<usize>,
+    scores: Vec<f32>,
+}
+
+impl ScoredStringPool {
+    /// Create a new empty string pool
+    pub fn new() -> Self {
+        Self {
+            data: Vec::new(),
+            offsets: vec![0],
+            scores: Vec::new(),
+        }
+    }
+
+    /// Initialize the pool
+    pub fn init(&mut self) {
+        self.push_back_offset(0);
+    }
+
+    /// Resize the pool
+    pub fn resize(&mut self, num_bytes: usize, k: u32) {
+        self.scores.resize(k as usize, 0.0);
+        self.data.resize(num_bytes, 0);
+    }
+
+    /// Clear the pool
+    pub fn clear(&mut self) {
+        self.offsets.clear();
+    }
+
+    /// Get the number of strings in the pool
+    pub fn size(&self) -> usize {
+        assert!(!self.offsets.is_empty());
+        self.offsets.len() - 1
+    }
+
+    /// Get the total number of bytes used
+    pub fn bytes(&self) -> usize {
+        std::mem::size_of_val(&self.data) +
+        std::mem::size_of_val(&self.offsets) +
+        std::mem::size_of_val(&self.scores)
+    }
+
+    /// Get a mutable reference to the data
+    pub fn data_mut(&mut self) -> &mut [u8] {
+        &mut self.data
+    }
+
+    /// Add a new offset
+    pub fn push_back_offset(&mut self, offset: usize) {
+        self.offsets.push(offset);
+    }
+
+    /// Get a mutable reference to the scores
+    pub fn scores_mut(&mut self) -> &mut [f32] {
+        &mut self.scores
+    }
+
+    /// Get a reference to the scores
+    pub fn scores(&self) -> &[f32] {
+        &self.scores
+    }
+
+    /// Get a scored byte range at the given index
+    pub fn get(&self, index: usize) -> ByteRange {
+        if index >= self.offsets.len() - 1 {
+            return ByteRange::new(0, 0);
+        }
+        ByteRange::new(
+            self.offsets[index],
+            self.offsets[index + 1]
+        )
+    }
+
+    /// Set the offsets vector
+    pub fn set_offsets(&mut self, offsets: Vec<usize>) {
+        self.offsets = offsets;
+    }
+
+    /// Set the scores vector
+    pub fn set_scores(&mut self, scores: Vec<f32>) {
+        self.scores = scores;
+    }
+
+    /// Set the data vector
+    pub fn set_data(&mut self, data: Vec<u8>) {
+        self.data = data;
+    }
+
+    pub fn get_score(&self, index: usize) -> f32 {
+        self.scores.get(index).copied().unwrap_or(0.0)
+    }
+}
+
+/// Iterator over scored strings in the pool
+pub struct ScoredStringPoolIterator<'a> {
+    pool: &'a ScoredStringPool,
+    pos: usize,
+}
+
+impl<'a> ScoredStringPoolIterator<'a> {
+    /// Create a new iterator
+    pub fn new(pool: &'a ScoredStringPool, pos: usize) -> Self {
+        Self { pool, pos }
+    }
+
+    /// Check if the iterator is empty
+    pub fn empty(&self) -> bool {
+        self.size() == 0
+    }
+
+    /// Get the number of strings
+    pub fn size(&self) -> usize {
+        self.pool.size()
+    }
+
+    /// Get the pool
+    pub fn pool(&self) -> &ScoredStringPool {
+        self.pool
+    }
+}
+
+impl<'a> Iterator for ScoredStringPoolIterator<'a> {
+    type Item = ScoredByteRange;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.pos < self.pool.size() {
+            let item = ScoredByteRange {
+                string: self.pool.get(self.pos),
+                score: self.pool.get_score(self.pos) as IdType,
+            };
+            self.pos += 1;
+            Some(item)
+        } else {
+            None
+        }
+    }
+}
+
+impl ScoredStringPool {
+    /// Get an iterator over the scored strings
+    pub fn iter(&self) -> ScoredStringPoolIterator {
+        ScoredStringPoolIterator::new(self, 0)
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/trie.rs b/autocomplete-rs/src/trie.rs
new file mode 100644
index 0000000..05f80e5
--- /dev/null
+++ b/autocomplete-rs/src/trie.rs
@@ -0,0 +1,147 @@
+use std::collections::HashMap;
+use crate::types::IdType;
+
+#[derive(Default, Clone)]
+struct TrieNode {
+    children: HashMap<char, Box<TrieNode>>,
+    id: Option<IdType>,
+    score: f32,
+}
+
+impl TrieNode {
+    fn new() -> Self {
+        Self {
+            children: HashMap::new(),
+            id: None,
+            score: 0.0,
+        }
+    }
+
+    fn is_terminal(&self) -> bool {
+        self.id.is_some()
+    }
+}
+
+#[derive(Clone)]
+pub struct Trie {
+    root: TrieNode,
+}
+
+impl Trie {
+    pub fn new() -> Self {
+        Self {
+            root: TrieNode::new(),
+        }
+    }
+
+    pub fn insert(&mut self, completion: &str, id: IdType, score: f32) {
+        let mut current = &mut self.root;
+        let chars: Vec<char> = completion.chars().collect();
+        
+        for &c in &chars {
+            current = current.children
+                .entry(c)
+                .or_insert_with(|| Box::new(TrieNode::new()));
+        }
+        
+        current.id = Some(id);
+        current.score = score;
+    }
+
+    pub fn remove(&mut self, completion: &str) -> bool {
+        let mut path = Vec::new();
+        let mut current = &mut self.root;
+        
+        // First pass: find the path to the node
+        for c in completion.chars() {
+            if let Some(next) = current.children.get_mut(&c) {
+                path.push(c);
+                current = next;
+            } else {
+                return false; // String not found
+            }
+        }
+        
+        // If the node is not a terminal, the string wasn't in the trie
+        if !current.is_terminal() {
+            return false;
+        }
+        
+        // Remove the terminal marker
+        current.id = None;
+        current.score = 0.0;
+        
+        // Second pass: remove empty nodes
+        let mut current = &mut self.root;
+        for &c in &path[..path.len()-1] {
+            current = current.children.get_mut(&c).unwrap();
+        }
+        
+        // Remove the last node if it's empty
+        if current.children.is_empty() && !current.is_terminal() {
+            current.children.remove(&path[path.len()-1]);
+        }
+        
+        true
+    }
+
+    pub fn complete(&self, prefix: &str) -> Vec<(IdType, f32)> {
+        let mut current = &self.root;
+        
+        // Navigate to the prefix node
+        for c in prefix.chars() {
+            if let Some(next) = current.children.get(&c) {
+                current = next;
+            } else {
+                return Vec::new(); // Prefix not found
+            }
+        }
+        
+        // Collect all completions from this node
+        let mut results = Vec::new();
+        self.collect_completions(current, &mut results);
+        results
+    }
+
+    fn collect_completions(&self, node: &TrieNode, results: &mut Vec<(IdType, f32)>) {
+        if let Some(id) = node.id {
+            results.push((id, node.score));
+        }
+        
+        for child in node.children.values() {
+            self.collect_completions(child, results);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_trie_insert_and_complete() {
+        let mut trie = Trie::new();
+        trie.insert("hello", 1, 1.0);
+        trie.insert("help", 2, 0.8);
+        trie.insert("world", 3, 0.5);
+        
+        let completions = trie.complete("hel");
+        assert_eq!(completions.len(), 2);
+        assert!(completions.contains(&(1, 1.0)));
+        assert!(completions.contains(&(2, 0.8)));
+    }
+
+    #[test]
+    fn test_trie_remove() {
+        let mut trie = Trie::new();
+        trie.insert("hello", 1, 1.0);
+        trie.insert("help", 2, 0.8);
+        
+        assert!(trie.remove("hello"));
+        assert!(!trie.remove("hello")); // Already removed
+        assert!(trie.remove("help"));
+        
+        let completions = trie.complete("hel");
+        assert_eq!(completions.len(), 0);
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/src/types.rs b/autocomplete-rs/src/types.rs
new file mode 100644
index 0000000..cbd9316
--- /dev/null
+++ b/autocomplete-rs/src/types.rs
@@ -0,0 +1,104 @@
+/// Type alias for document and term IDs
+pub type IdType = u32;
+
+/// Type alias for completion type (vector of term IDs)
+pub type CompletionType = Vec<IdType>;
+
+/// Type alias for score type
+pub type ScoreType = f32;
+
+/// Represents a range of values
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct ValueRange {
+    pub begin: u64,
+    pub end: u64,
+}
+
+impl ValueRange {
+    /// Check if the range is invalid
+    pub fn is_invalid(&self) -> bool {
+        self.begin == u64::MAX || self.end == u64::MAX || self.begin > self.end
+    }
+
+    /// Check if the range is valid
+    pub fn is_valid(&self) -> bool {
+        !self.is_invalid()
+    }
+
+    /// Check if a value is contained in the range
+    pub fn contains(&self, val: u64) -> bool {
+        val >= self.begin && val <= self.end
+    }
+}
+
+/// Represents a scored range
+#[derive(Debug, Clone)]
+pub struct ScoredRange {
+    pub range: ValueRange,
+    pub min_pos: u32,
+    pub min_val: IdType,
+}
+
+impl ScoredRange {
+    /// Compare two scored ranges
+    pub fn greater(l: &ScoredRange, r: &ScoredRange) -> bool {
+        l.min_val > r.min_val
+    }
+}
+
+/// Represents a byte range
+#[derive(Debug, Clone, Copy)]
+pub struct ByteRange {
+    pub start: usize,
+    pub end: usize,
+}
+
+impl ByteRange {
+    pub fn new(start: usize, end: usize) -> Self {
+        Self { start, end }
+    }
+
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+}
+
+/// Represents a range of 32-bit integers
+#[derive(Debug, Clone, Copy)]
+pub struct Uint32Range {
+    pub begin: *const u32,
+    pub end: *const u32,
+}
+
+/// Global constants
+pub mod global {
+    use super::IdType;
+
+    /// Invalid term ID
+    pub const INVALID_TERM_ID: IdType = IdType::MAX;
+    
+    /// Terminator value
+    pub const TERMINATOR: IdType = 0;
+    
+    /// Not found value
+    pub const NOT_FOUND: u64 = u64::MAX;
+    
+    /// Linear scan threshold
+    pub const LINEAR_SCAN_THRESHOLD: u64 = 8;
+}
+
+/// Convert a string to a byte range
+pub fn string_to_byte_range(s: &str) -> ByteRange {
+    ByteRange {
+        start: 0,
+        end: s.len(),
+    }
+}
+
+/// Convert a completion to a uint32 range
+pub fn completion_to_uint32_range(c: &CompletionType) -> Uint32Range {
+    Uint32Range {
+        begin: c.as_ptr(),
+        end: unsafe { c.as_ptr().add(c.len()) },
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/tests/constants_tests.rs b/autocomplete-rs/tests/constants_tests.rs
new file mode 100644
index 0000000..94123cc
--- /dev/null
+++ b/autocomplete-rs/tests/constants_tests.rs
@@ -0,0 +1,21 @@
+use autocomplete_rs::constants::*;
+
+#[test]
+fn test_constants() {
+    // Test MAX_K
+    assert!(MAX_K > 0, "MAX_K should be positive");
+    assert!(MAX_K <= 100, "MAX_K should be reasonably small");
+
+    // Test MAX_NUM_TERMS_PER_QUERY
+    assert!(MAX_NUM_TERMS_PER_QUERY > 0, "MAX_NUM_TERMS_PER_QUERY should be positive");
+    assert!(MAX_NUM_TERMS_PER_QUERY < 256, "MAX_NUM_TERMS_PER_QUERY must be < 256");
+
+    // Test MAX_NUM_CHARS_PER_QUERY
+    assert!(MAX_NUM_CHARS_PER_QUERY > 0, "MAX_NUM_CHARS_PER_QUERY should be positive");
+    assert!(MAX_NUM_CHARS_PER_QUERY >= MAX_K, "MAX_NUM_CHARS_PER_QUERY should be >= MAX_K");
+
+    // Test POOL_SIZE
+    assert!(POOL_SIZE > 0, "POOL_SIZE should be positive");
+    assert_eq!(POOL_SIZE, (MAX_K as usize) * (MAX_NUM_CHARS_PER_QUERY as usize), 
+        "POOL_SIZE should be MAX_K * MAX_NUM_CHARS_PER_QUERY");
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/tests/dictionary_tests.rs b/autocomplete-rs/tests/dictionary_tests.rs
new file mode 100644
index 0000000..1aab6d8
--- /dev/null
+++ b/autocomplete-rs/tests/dictionary_tests.rs
@@ -0,0 +1,119 @@
+use autocomplete_rs::dictionary::Dictionary;
+use autocomplete_rs::types::IdType;
+
+#[test]
+fn test_dictionary_new() {
+    let dict = Dictionary::new();
+    assert!(dict.is_empty());
+    assert_eq!(dict.len(), 0);
+}
+
+#[test]
+fn test_dictionary_insert() {
+    let mut dict = Dictionary::new();
+    
+    // Test first insertion
+    let id1 = dict.insert("hello".to_string());
+    assert_eq!(id1, 0);
+    assert_eq!(dict.len(), 1);
+    
+    // Test duplicate insertion
+    let id2 = dict.insert("hello".to_string());
+    assert_eq!(id2, id1);
+    assert_eq!(dict.len(), 1);
+    
+    // Test new insertion
+    let id3 = dict.insert("world".to_string());
+    assert_eq!(id3, 1);
+    assert_eq!(dict.len(), 2);
+}
+
+#[test]
+fn test_dictionary_get() {
+    let mut dict = Dictionary::new();
+    
+    // Insert test data
+    let id1 = dict.insert("hello".to_string());
+    let id2 = dict.insert("world".to_string());
+    
+    // Test valid gets
+    assert_eq!(dict.get(id1), Some("hello"));
+    assert_eq!(dict.get(id2), Some("world"));
+    
+    // Test invalid id
+    assert_eq!(dict.get(999), None);
+}
+
+#[test]
+fn test_dictionary_get_id() {
+    let mut dict = Dictionary::new();
+    
+    // Insert test data
+    let id1 = dict.insert("hello".to_string());
+    let id2 = dict.insert("world".to_string());
+    
+    // Test valid gets
+    assert_eq!(dict.get_id("hello"), Some(id1));
+    assert_eq!(dict.get_id("world"), Some(id2));
+    
+    // Test non-existent string
+    assert_eq!(dict.get_id("nonexistent"), None);
+}
+
+#[test]
+fn test_dictionary_len_and_empty() {
+    let mut dict = Dictionary::new();
+    
+    // Test empty state
+    assert!(dict.is_empty());
+    assert_eq!(dict.len(), 0);
+    
+    // Test after insertions
+    dict.insert("hello".to_string());
+    assert!(!dict.is_empty());
+    assert_eq!(dict.len(), 1);
+    
+    dict.insert("world".to_string());
+    assert!(!dict.is_empty());
+    assert_eq!(dict.len(), 2);
+    
+    // Test duplicate insertion doesn't change length
+    dict.insert("hello".to_string());
+    assert_eq!(dict.len(), 2);
+}
+
+#[test]
+fn test_dictionary_id_sequence() {
+    let mut dict = Dictionary::new();
+    
+    // Test that IDs are assigned sequentially
+    let id1 = dict.insert("first".to_string());
+    let id2 = dict.insert("second".to_string());
+    let id3 = dict.insert("third".to_string());
+    
+    assert_eq!(id1, 0);
+    assert_eq!(id2, 1);
+    assert_eq!(id3, 2);
+}
+
+#[test]
+fn test_dictionary_large_insertions() {
+    let mut dict = Dictionary::new();
+    let num_insertions = 1000;
+    
+    // Insert many strings
+    for i in 0..num_insertions {
+        let s = format!("string_{}", i);
+        let id = dict.insert(s);
+        assert_eq!(id, i as IdType);
+    }
+    
+    assert_eq!(dict.len(), num_insertions);
+    
+    // Verify all strings can be retrieved
+    for i in 0..num_insertions {
+        let s = format!("string_{}", i);
+        assert_eq!(dict.get(i as IdType), Some(s.as_str()));
+        assert_eq!(dict.get_id(&s), Some(i as IdType));
+    }
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/tests/parameters_tests.rs b/autocomplete-rs/tests/parameters_tests.rs
new file mode 100644
index 0000000..2bd6762
--- /dev/null
+++ b/autocomplete-rs/tests/parameters_tests.rs
@@ -0,0 +1,98 @@
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+use tempfile::NamedTempFile;
+use autocomplete_rs::parameters::Parameters;
+use autocomplete_rs::constants::{MAX_NUM_CHARS_PER_QUERY, MAX_NUM_TERMS_PER_QUERY};
+
+fn create_test_stats_file() -> NamedTempFile {
+    let mut file = NamedTempFile::new().unwrap();
+    writeln!(file, "1000").unwrap();  // num_terms
+    writeln!(file, "50").unwrap();    // max_string_length
+    writeln!(file, "500").unwrap();   // num_completions
+    writeln!(file, "1000").unwrap();  // universe
+    writeln!(file, "3").unwrap();     // num_levels
+    writeln!(file, "100").unwrap();   // nodes_per_level[0]
+    writeln!(file, "200").unwrap();   // nodes_per_level[1]
+    writeln!(file, "300").unwrap();   // nodes_per_level[2]
+    file
+}
+
+#[test]
+fn test_parameters_load_valid() {
+    let test_file = create_test_stats_file();
+    let mut params = Parameters::new();
+    let path = test_file.path().to_str().unwrap().to_string();
+    println!("Test file path: {}", path);
+    params.collection_basename = path;
+    
+    match params.load() {
+        Ok(_) => println!("Load succeeded"),
+        Err(e) => println!("Load failed: {}", e),
+    }
+    
+    assert!(params.load().is_ok());
+    assert_eq!(params.num_terms, 1000);
+    assert_eq!(params.max_string_length, 50);
+    assert_eq!(params.num_completions, 500);
+    assert_eq!(params.universe, 1000);
+    assert_eq!(params.num_levels, 3);
+    assert_eq!(params.nodes_per_level, vec![100, 200, 300]);
+}
+
+#[test]
+fn test_parameters_load_invalid_file() {
+    let mut params = Parameters::new();
+    params.collection_basename = "nonexistent_file".to_string();
+    assert!(params.load().is_err());
+}
+
+#[test]
+fn test_parameters_load_invalid_data() {
+    let mut file = NamedTempFile::new().unwrap();
+    writeln!(file, "0").unwrap();  // invalid num_terms
+    writeln!(file, "50").unwrap();
+    writeln!(file, "500").unwrap();
+    writeln!(file, "1000").unwrap();
+    writeln!(file, "3").unwrap();
+    writeln!(file, "100").unwrap();
+    writeln!(file, "200").unwrap();
+    writeln!(file, "300").unwrap();
+
+    let mut params = Parameters::new();
+    params.collection_basename = file.path().to_str().unwrap().to_string();
+    assert!(params.load().is_err());
+}
+
+#[test]
+fn test_parameters_load_invalid_constants() {
+    let mut file = NamedTempFile::new().unwrap();
+    writeln!(file, "1000").unwrap();
+    writeln!(file, "{}", MAX_NUM_CHARS_PER_QUERY + 1).unwrap();  // exceeds MAX_NUM_CHARS_PER_QUERY
+    writeln!(file, "500").unwrap();
+    writeln!(file, "1000").unwrap();
+    writeln!(file, "3").unwrap();
+    writeln!(file, "100").unwrap();
+    writeln!(file, "200").unwrap();
+    writeln!(file, "300").unwrap();
+
+    let mut params = Parameters::new();
+    params.collection_basename = file.path().to_str().unwrap().to_string();
+    assert!(params.load().is_err());
+}
+
+#[test]
+fn test_parameters_load_truncated() {
+    let mut file = NamedTempFile::new().unwrap();
+    writeln!(file, "1000").unwrap();
+    writeln!(file, "50").unwrap();
+    writeln!(file, "500").unwrap();
+    writeln!(file, "1000").unwrap();
+    writeln!(file, "3").unwrap();
+    writeln!(file, "100").unwrap();
+    // Missing nodes_per_level entries
+
+    let mut params = Parameters::new();
+    params.collection_basename = file.path().to_str().unwrap().to_string();
+    assert!(params.load().is_err());
+} 
\ No newline at end of file
diff --git a/autocomplete-rs/tests/probe_tests.rs b/autocomplete-rs/tests/probe_tests.rs
new file mode 100644
index 0000000..7e869e1
--- /dev/null
+++ b/autocomplete-rs/tests/probe_tests.rs
@@ -0,0 +1,79 @@
+use std::thread;
+use std::time::Duration;
+use autocomplete_rs::probe::{Probe, NopProbe, TimerProbe};
+
+#[test]
+fn test_nop_probe() {
+    let mut probe = NopProbe;
+    // These should not panic
+    probe.start(0);
+    probe.stop(0);
+}
+
+#[test]
+fn test_timer_probe_single() {
+    let mut probe = TimerProbe::new(1);
+    
+    probe.start(0);
+    thread::sleep(Duration::from_millis(100));
+    probe.stop(0);
+    
+    let duration = probe.get_duration(0);
+    assert!(duration >= Duration::from_millis(100));
+}
+
+#[test]
+fn test_timer_probe_multiple() {
+    let mut probe = TimerProbe::new(3);
+    
+    // Timer 0
+    probe.start(0);
+    thread::sleep(Duration::from_millis(100));
+    probe.stop(0);
+    
+    // Timer 1
+    probe.start(1);
+    thread::sleep(Duration::from_millis(200));
+    probe.stop(1);
+    
+    // Timer 2
+    probe.start(2);
+    thread::sleep(Duration::from_millis(300));
+    probe.stop(2);
+    
+    assert!(probe.get_duration(0) >= Duration::from_millis(100));
+    assert!(probe.get_duration(1) >= Duration::from_millis(200));
+    assert!(probe.get_duration(2) >= Duration::from_millis(300));
+}
+
+#[test]
+fn test_timer_probe_accumulation() {
+    let mut probe = TimerProbe::new(1);
+    
+    // First interval
+    probe.start(0);
+    thread::sleep(Duration::from_millis(100));
+    probe.stop(0);
+    
+    // Second interval
+    probe.start(0);
+    thread::sleep(Duration::from_millis(100));
+    probe.stop(0);
+    
+    let duration = probe.get_duration(0);
+    assert!(duration >= Duration::from_millis(200));
+}
+
+#[test]
+#[should_panic(expected = "Timer ID out of bounds")]
+fn test_timer_probe_invalid_id() {
+    let mut probe = TimerProbe::new(1);
+    probe.start(1); // Should panic as we only have timer 0
+}
+
+#[test]
+#[should_panic(expected = "Timer ID out of bounds")]
+fn test_timer_probe_get_invalid_id() {
+    let probe = TimerProbe::new(1);
+    probe.get_duration(1); // Should panic as we only have timer 0
+} 
\ No newline at end of file
diff --git a/benchmark/benchmark_common.hpp b/benchmark/benchmark_common.hpp
deleted file mode 100644
index 0fdae98..0000000
--- a/benchmark/benchmark_common.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-namespace autocomplete {
-
-static const uint32_t runs = 5;
-
-size_t load_queries(std::vector<std::string>& queries, uint32_t max_num_queries,
-                    float percentage, std::istream& is = std::cin) {
-    assert(percentage >= 0.0 and percentage <= 1.0);
-    std::string line;
-    queries.reserve(max_num_queries);
-    for (uint32_t i = 0; i != max_num_queries; ++i) {
-        if (!std::getline(is, line)) break;
-
-        auto query = line.substr(line.find(' ') + 1, line.size());
-        int32_t size = query.size() - 1;
-        while (size >= 0 and query[size] != ' ') --size;
-        auto last_token = query.substr(size + 1, query.size() - size);
-        uint32_t num_chars =
-            last_token.size() - std::ceil(last_token.size() * percentage);
-        char first = last_token.front();
-        for (uint32_t i = 0; i != num_chars; ++i) last_token.pop_back();
-
-        // retain at least one char
-        if (last_token.empty()) last_token.push_back(first);
-        assert(last_token.size() > 0);
-
-        queries.push_back(query.substr(0, size + 1) + last_token);
-    }
-    return queries.size();
-}
-
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/benchmark/benchmark_conjunctive_topk.cpp b/benchmark/benchmark_conjunctive_topk.cpp
deleted file mode 100644
index 2a04c4c..0000000
--- a/benchmark/benchmark_conjunctive_topk.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "statistics.hpp"
-#include "benchmark_common.hpp"
-
-using namespace autocomplete;
-
-template <typename Index>
-void benchmark_conjunctive_topk(char const* binary_filename, uint32_t k,
-                                uint32_t max_num_queries,
-                                essentials::json_lines& breakdowns,
-                                bool breakdown) {
-    Index autocomp;
-    essentials::logger("loading data structure from disk...");
-    essentials::load(autocomp, binary_filename);
-    essentials::logger("DONE");
-    autocomp.print_stats();
-
-    std::vector<std::string> queries;
-    essentials::logger("loading queries...");
-    uint32_t num_queries =
-        load_queries(queries, max_num_queries, 0.25, std::cin);
-    essentials::logger("loaded " + std::to_string(num_queries) + " queries");
-
-    auto ns_x_query = [&](double time) {
-        return uint64_t(time / (runs * num_queries) * 1000);
-    };
-
-    essentials::logger("benchmarking conjunctive_topk queries...");
-    uint64_t reported_strings = 0;
-
-    if (breakdown) {
-        std::vector<timer_type> timers(4);
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = autocomp.conjunctive_topk(query, k, timers);
-                reported_strings += it.size();
-            }
-        }
-        essentials::logger("DONE");
-        std::cout << reported_strings << std::endl;
-        breakdowns.add("num_queries", std::to_string(num_queries));
-        breakdowns.add("parsing_ns_per_query",
-                       std::to_string(ns_x_query(timers[0].elapsed())));
-        breakdowns.add("dictionary_search_ns_per_query",
-                       std::to_string(ns_x_query(timers[1].elapsed())));
-        breakdowns.add("conjunctive_search_ns_per_query",
-                       std::to_string(ns_x_query(timers[2].elapsed())));
-        breakdowns.add("reporting_ns_per_query",
-                       std::to_string(ns_x_query(timers[3].elapsed())));
-    } else {
-        essentials::timer_type timer;
-        timer.start();
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = autocomp.conjunctive_topk(query, k);
-                reported_strings += it.size();
-            }
-        }
-        timer.stop();
-        essentials::logger("DONE");
-        std::cout << reported_strings << std::endl;
-        breakdowns.add("num_queries", std::to_string(num_queries));
-        breakdowns.add("ns_per_query",
-                       std::to_string(ns_x_query(timer.elapsed())));
-    }
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 5;
-    if (argc < mandatory + 1) {
-        std::cout << argv[0]
-                  << " <type> <k> <binary_filename> <num_terms_per_query> "
-                     "<max_num_queries> --breakdown < queries"
-                  << std::endl;
-        return 1;
-    }
-
-    std::string type(argv[1]);
-    uint32_t k = std::atoi(argv[2]);
-    char const* binary_filename = argv[3];
-    std::string num_terms_per_query(argv[4]);
-    uint32_t max_num_queries = std::atoi(argv[5]);
-
-    bool breakdown = false;
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "--breakdown") {
-            breakdown = true;
-        }
-    }
-
-    essentials::json_lines breakdowns;
-    breakdowns.new_line();
-    breakdowns.add("num_terms_per_query", num_terms_per_query);
-
-    if (type == "type1") {
-        benchmark_conjunctive_topk<ef_autocomplete_type1>(
-            binary_filename, k, max_num_queries, breakdowns, breakdown);
-    } else if (type == "type2") {
-        benchmark_conjunctive_topk<ef_autocomplete_type2>(
-            binary_filename, k, max_num_queries, breakdowns, breakdown);
-    } else if (type == "type3") {
-        benchmark_conjunctive_topk<ef_autocomplete_type3>(
-            binary_filename, k, max_num_queries, breakdowns, breakdown);
-    } else {
-        std::cout << "error: unknown type '" << type << "'" << std::endl;
-        return 1;
-    }
-
-    breakdowns.print();
-    return 0;
-}
\ No newline at end of file
diff --git a/benchmark/benchmark_prefix_topk.cpp b/benchmark/benchmark_prefix_topk.cpp
deleted file mode 100644
index 2149e03..0000000
--- a/benchmark/benchmark_prefix_topk.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "statistics.hpp"
-#include "benchmark_common.hpp"
-
-using namespace autocomplete;
-
-template <typename Index>
-void benchmark_prefix_topk(char const* binary_filename, uint32_t k,
-                           uint32_t max_num_queries,
-                           essentials::json_lines& breakdowns, bool breakdown) {
-    Index autocomp;
-    essentials::logger("loading data structure from disk...");
-    essentials::load(autocomp, binary_filename);
-    essentials::logger("DONE");
-    autocomp.print_stats();
-
-    std::vector<std::string> queries;
-    essentials::logger("loading queries...");
-    uint32_t num_queries =
-        load_queries(queries, max_num_queries, 0.25, std::cin);
-    essentials::logger("loaded " + std::to_string(num_queries) + " queries");
-
-    auto ns_x_query = [&](double time) {
-        return uint64_t(time / (runs * num_queries) * 1000);
-    };
-
-    essentials::logger("benchmarking prefix_topk queries...");
-    uint64_t reported_strings = 0;
-
-    if (breakdown) {
-        std::vector<timer_type> timers(4);
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = autocomp.prefix_topk(query, k, timers);
-                reported_strings += it.size();
-            }
-        }
-        essentials::logger("DONE");
-        std::cout << reported_strings << std::endl;
-        breakdowns.add("num_queries", std::to_string(num_queries));
-        breakdowns.add("parsing_ns_per_query",
-                       std::to_string(ns_x_query(timers[0].elapsed())));
-        breakdowns.add("completions_search_ns_per_query",
-                       std::to_string(ns_x_query(timers[1].elapsed())));
-        breakdowns.add("topk_rmq_ns_per_query",
-                       std::to_string(ns_x_query(timers[2].elapsed())));
-        breakdowns.add("reporting_ns_per_query",
-                       std::to_string(ns_x_query(timers[3].elapsed())));
-    } else {
-        essentials::timer_type timer;
-        timer.start();
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = autocomp.prefix_topk(query, k);
-                reported_strings += it.size();
-            }
-        }
-        timer.stop();
-        essentials::logger("DONE");
-        std::cout << reported_strings << std::endl;
-        breakdowns.add("num_queries", std::to_string(num_queries));
-        breakdowns.add("ns_per_query",
-                       std::to_string(ns_x_query(timer.elapsed())));
-    }
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 5;
-    if (argc < mandatory + 1) {
-        std::cout << argv[0]
-                  << " <type> <k> <binary_filename> <num_terms_per_query> "
-                     "<max_num_queries> --breakdown < queries"
-                  << std::endl;
-        return 1;
-    }
-
-    std::string type(argv[1]);
-    uint32_t k = std::atoi(argv[2]);
-    char const* binary_filename = argv[3];
-    std::string num_terms_per_query(argv[4]);
-    uint32_t max_num_queries = std::atoi(argv[5]);
-
-    bool breakdown = false;
-    for (int i = mandatory + 1; i != argc; ++i) {
-        if (std::string(argv[i]) == "--breakdown") {
-            breakdown = true;
-        }
-    }
-
-    essentials::json_lines breakdowns;
-    breakdowns.new_line();
-    breakdowns.add("num_terms_per_query", num_terms_per_query);
-
-    if (type == "type1") {
-        benchmark_prefix_topk<ef_autocomplete_type1>(
-            binary_filename, k, max_num_queries, breakdowns, breakdown);
-    } else if (type == "type2") {
-        benchmark_prefix_topk<ef_autocomplete_type2>(
-            binary_filename, k, max_num_queries, breakdowns, breakdown);
-    } else {
-        std::cout << "error: unknown type '" << type << "'" << std::endl;
-        return 1;
-    }
-
-    breakdowns.print();
-    return 0;
-}
\ No newline at end of file
diff --git a/benchmark/benchmark_topk.cpp b/benchmark/benchmark_topk.cpp
deleted file mode 100644
index a294afe..0000000
--- a/benchmark/benchmark_topk.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "statistics.hpp"
-#include "benchmark_common.hpp"
-
-using namespace autocomplete;
-
-template <typename Index>
-void benchmark_topk(char const* binary_filename, uint32_t k,
-                    uint32_t max_num_queries, float keep,
-                    essentials::json_lines& breakdowns, bool breakdown) {
-    Index index;
-    essentials::load(index, binary_filename);
-
-    std::vector<std::string> queries;
-    uint32_t num_queries =
-        load_queries(queries, max_num_queries, keep, std::cin);
-
-    uint64_t reported_strings = 0;
-    auto musec_per_query = [&](double time) {
-        return time / (runs * num_queries);
-    };
-
-    breakdowns.add("num_queries", std::to_string(num_queries));
-
-    if (breakdown) {
-        std::vector<timer_type> timers(4);
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = index.topk(query, k, timers);
-                reported_strings += it.size();
-            }
-        }
-
-        std::cout << reported_strings << std::endl;
-
-        breakdowns.add("parsing_musec_per_query",
-                       std::to_string(musec_per_query(timers[0].elapsed())));
-        breakdowns.add("prefix_search_musec_per_query",
-                       std::to_string(musec_per_query(timers[1].elapsed())));
-        breakdowns.add("conjunctive_search_musec_per_query",
-                       std::to_string(musec_per_query(timers[2].elapsed())));
-        breakdowns.add("reporting_musec_per_query",
-                       std::to_string(musec_per_query(timers[3].elapsed())));
-
-    } else {
-        essentials::timer_type timer;
-        timer.start();
-        for (uint32_t run = 0; run != runs; ++run) {
-            for (auto const& query : queries) {
-                auto it = index.topk(query, k);
-                reported_strings += it.size();
-            }
-        }
-        timer.stop();
-
-        std::cout << reported_strings << std::endl;
-
-        breakdowns.add("musec_per_query",
-                       std::to_string(musec_per_query(timer.elapsed())));
-    }
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 6;
-    if (argc < mandatory + 1) {
-        std::cout << argv[0]
-                  << " <type> <k> <binary_filename> <num_terms_per_query> "
-                     "<max_num_queries> <percentage> [--breakdown] < queries"
-                  << std::endl;
-        std::cout << "<percentage> is a float in [0,1] and specifies how much "
-                     "we keep of the last token in a query "
-                  << std::endl;
-        return 1;
-    }
-
-    std::string type(argv[1]);
-    uint32_t k = std::atoi(argv[2]);
-    char const* binary_filename = argv[3];
-    std::string num_terms_per_query(argv[4]);
-    uint32_t max_num_queries = std::atoi(argv[5]);
-    float keep = std::atof(argv[6]);
-
-    bool breakdown = false;
-    for (int i = mandatory + 1; i != argc; ++i) {
-        if (std::string(argv[i]) == "--breakdown") {
-            breakdown = true;
-        }
-    }
-
-    essentials::json_lines breakdowns;
-    breakdowns.new_line();
-    breakdowns.add("num_terms_per_query", num_terms_per_query);
-    breakdowns.add("percentage", std::to_string(keep));
-
-    if (type == "ef_type1") {
-        benchmark_topk<ef_autocomplete_type1>(
-            binary_filename, k, max_num_queries, keep, breakdowns, breakdown);
-    } else if (type == "ef_type2") {
-        benchmark_topk<ef_autocomplete_type2>(
-            binary_filename, k, max_num_queries, keep, breakdowns, breakdown);
-    } else if (type == "ef_type3") {
-        benchmark_topk<ef_autocomplete_type3>(
-            binary_filename, k, max_num_queries, keep, breakdowns, breakdown);
-    } else if (type == "ef_type4") {
-        benchmark_topk<ef_autocomplete_type4>(
-            binary_filename, k, max_num_queries, keep, breakdowns, breakdown);
-    } else {
-        return 1;
-    }
-
-    breakdowns.print();
-    return 0;
-}
\ No newline at end of file
diff --git a/doc/activity_diagram.md b/doc/activity_diagram.md
new file mode 100644
index 0000000..993101b
--- /dev/null
+++ b/doc/activity_diagram.md
@@ -0,0 +1,157 @@
+# Activity Diagrams
+
+This document provides activity diagrams for the main workflows in the autocomplete system.
+
+## System Initialization and Index Building
+
+```mermaid
+graph TD
+    Start([Start]) --> LoadParams[Load Parameters]
+    LoadParams --> InitComponents[Initialize Components]
+    InitComponents --> BuildTrie[Build Completion Trie]
+    BuildTrie --> BuildDict[Build Front-Coded Dictionary]
+    BuildDict --> BuildIndex[Build Inverted Index]
+    BuildIndex --> BuildForwardIndex[Build Forward Index]
+    BuildForwardIndex --> End([End])
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+    style End fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Autocomplete Query Processing
+
+```mermaid
+graph TD
+    Start([Start]) --> InputQuery[Input Query]
+    InputQuery --> ParseQuery[Parse Query Terms]
+    ParseQuery --> CheckPrefix[Check Prefix in Trie]
+    
+    CheckPrefix -->|Prefix Found| GetCompletions[Get Completions]
+    CheckPrefix -->|No Prefix| ReturnEmpty[Return Empty Results]
+    
+    GetCompletions --> ScoreCompletions[Score Completions]
+    ScoreCompletions --> SortResults[Sort by Score]
+    SortResults --> ReturnResults[Return Top-K Results]
+    
+    ReturnEmpty --> End([End])
+    ReturnResults --> End
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+    style End fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Search Operation Flow
+
+```mermaid
+graph TD
+    Start([Start]) --> InputTerms[Input Search Terms]
+    InputTerms --> ParseTerms[Parse Search Terms]
+    ParseTerms --> LookupTerms[Lookup Terms in Dictionary]
+    
+    LookupTerms -->|All Terms Found| GetPostings[Get Posting Lists]
+    LookupTerms -->|Terms Not Found| ReturnEmpty[Return Empty Results]
+    
+    GetPostings --> IntersectLists[Intersect Posting Lists]
+    IntersectLists --> ScoreDocs[Score Documents]
+    ScoreDocs --> SortResults[Sort by Score]
+    SortResults --> ReturnResults[Return Top-K Results]
+    
+    ReturnEmpty --> End([End])
+    ReturnResults --> End
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## String Pool Management
+
+```mermaid
+graph TD
+    Start([Start]) --> CheckCapacity[Check Pool Capacity]
+    CheckCapacity -->|Full| RemoveLowest[Remove Lowest Score]
+    CheckCapacity -->|Space Available| AddString[Add New String]
+    
+    RemoveLowest --> AddString
+    AddString --> UpdateScores[Update Scores]
+    UpdateScores --> SortPool[Sort Pool by Score]
+    SortPool --> End([End])
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+    style End fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Blocked Inverted Index Operations
+
+```mermaid
+graph TD
+    Start([Start]) --> InputDoc[Input Document]
+    InputDoc --> ExtractTerms[Extract Terms]
+    ExtractTerms --> CheckBlocks[Check Existing Blocks]
+    
+    CheckBlocks -->|Block Found| UpdateBlock[Update Block]
+    CheckBlocks -->|New Block| CreateBlock[Create New Block]
+    
+    UpdateBlock --> MergeCheck[Check Merge Condition]
+    CreateBlock --> MergeCheck
+    
+    MergeCheck -->|Merge Needed| MergeBlocks[Merge Blocks]
+    MergeCheck -->|No Merge| UpdateIndex[Update Index]
+    
+    MergeBlocks --> UpdateIndex
+    UpdateIndex --> End([End])
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+    style End fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Performance Measurement Flow
+
+```mermaid
+graph TD
+    Start([Start]) --> StartTimer[Start Timer]
+    StartTimer --> Operation[Perform Operation]
+    Operation --> StopTimer[Stop Timer]
+    StopTimer --> RecordMetrics[Record Metrics]
+    RecordMetrics --> AnalyzePerformance[Analyze Performance]
+    AnalyzePerformance --> End([End])
+
+    style Start fill:#f9f,stroke:#333,stroke-width:2px
+    style End fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Key Operations Description
+
+### System Initialization
+1. Load configuration parameters
+2. Initialize core components
+3. Build data structures
+4. Set up indexes
+
+### Query Processing
+1. Parse and validate input
+2. Check prefix in trie
+3. Retrieve and score completions
+4. Sort and return results
+
+### Search Operations
+1. Process search terms
+2. Lookup in dictionary
+3. Retrieve and intersect posting lists
+4. Score and rank results
+
+### String Pool Management
+1. Maintain fixed-size pool
+2. Handle insertions and removals
+3. Update and sort scores
+4. Manage memory efficiently
+
+### Blocked Index Operations
+1. Process document updates
+2. Manage block structure
+3. Handle block merges
+4. Maintain index consistency
+
+### Performance Measurement
+1. Track operation timing
+2. Record performance metrics
+3. Analyze system behavior
+4. Optimize based on results 
\ No newline at end of file
diff --git a/doc/class_diagram.md b/doc/class_diagram.md
new file mode 100644
index 0000000..4a5f4db
--- /dev/null
+++ b/doc/class_diagram.md
@@ -0,0 +1,333 @@
+# C++ Class Diagram
+
+This document provides a comprehensive view of all classes in the C++ implementation and their relationships.
+
+## Main Class Diagram
+
+```mermaid
+classDiagram
+    class Parameters {
+        +uint32_t num_terms
+        +uint32_t max_string_length
+        +uint32_t num_completions
+        +uint32_t universe
+        +uint32_t num_levels
+        +vector~uint32_t~ nodes_per_level
+        +string collection_basename
+        +load()
+    }
+
+    class Probe {
+        <<interface>>
+        +start(id: uint64_t)
+        +stop(id: uint64_t)
+    }
+
+    class NopProbe {
+        +start(id: uint64_t)
+        +stop(id: uint64_t)
+    }
+
+    class TimerProbe {
+        -vector~Timer~ timers
+        +start(id: uint64_t)
+        +stop(id: uint64_t)
+        +get_duration(id: uint64_t)
+    }
+
+    class Timer {
+        -Instant start_time
+        -Duration total_duration
+        +start()
+        +stop()
+        +get_duration()
+    }
+
+    class ScoredStringPool {
+        -vector~id_type~ m_scores
+        -vector~size_t~ m_offsets
+        -vector~uint8_t~ m_data
+        +init()
+        +resize(size_t, uint32_t)
+        +clear()
+        +size()
+        +bytes()
+        +data()
+        +push_back_offset(size_t)
+        +scores()
+        +const_scores()
+    }
+
+    class ScoredByteRange {
+        +byte_range string
+        +id_type score
+    }
+
+    class TrieNode {
+        -unordered_map~char, TrieNode*~ children
+        -bool is_terminal
+        -vector~uint32_t~ completion_ids
+        +add_child(char)
+        +get_child(char)
+        +is_terminal()
+    }
+
+    class CompletionTrie {
+        -TrieNode* root
+        -size_t num_nodes
+        -size_t num_completions
+        +insert(string)
+        +complete(string)
+        +remove(string)
+        +clear()
+    }
+
+    class FCDictionary {
+        -vector~char~ data
+        -vector~uint32_t~ offsets
+        -size_t num_strings
+        -size_t total_size
+        +build(vector~string~)
+        +lookup(uint32_t)
+        +compress()
+        +decompress(uint32_t)
+    }
+
+    class IntegerFCDictionary {
+        -vector~uint32_t~ m_headers
+        -vector~uint8_t~ m_buckets
+        -size_t m_size
+        +build(vector~string~)
+        +lookup(uint32_t)
+        +extract(id_type, completion_type)
+    }
+
+    class Block {
+        -vector~uint32_t~ doc_ids
+        -uint32_t min_doc_id
+        -uint32_t max_doc_id
+        +add_doc(uint32_t)
+        +get_docs()
+        +get_range()
+    }
+
+    class InvertedIndex {
+        -vector~Block~ blocks
+        -unordered_map~string, vector~uint32_t~~ term_to_blocks
+        -size_t block_size
+        +add_document(uint32_t, vector~string~)
+        +search(vector~string~)
+        +merge_blocks()
+        +clear()
+    }
+
+    class CompactVector {
+        -vector~uint64_t~ m_bits
+        -uint8_t m_width
+        -uint64_t m_mask
+        +build(vector~uint64_t~)
+        +access(uint64_t)
+        +size()
+    }
+
+    class BitVector {
+        -vector~uint64_t~ m_bits
+        -size_t m_size
+        +build(bit_vector_builder*)
+        +size()
+        +bytes()
+        +operator[](uint64_t)
+        +get_bits(uint64_t, uint64_t)
+    }
+
+    class MinHeap {
+        -vector~T~ m_q
+        -Comparator m_comparator
+        +reserve(uint64_t)
+        +top()
+        +push(T)
+        +pop()
+        +clear()
+        +empty()
+        +size()
+    }
+
+    class Autocomplete {
+        -Parameters params
+        -ScoredStringPool string_pool
+        -CompletionTrie trie
+        -FCDictionary dictionary
+        -InvertedIndex index
+        +build_index(string)
+        +complete(string)
+        +search(vector~string~)
+    }
+
+    class Autocomplete2 {
+        -Parameters params
+        -ScoredStringPool string_pool
+        -CompletionTrie trie
+        -FCDictionary dictionary
+        -InvertedIndex index
+        -CompactVector docid_to_lexid
+        +build_index(string)
+        +complete(string)
+        +search(vector~string~)
+    }
+
+    class Autocomplete3 {
+        -Parameters params
+        -ScoredStringPool string_pool
+        -CompletionTrie trie
+        -FCDictionary dictionary
+        -InvertedIndex index
+        -MinHeap min_priority_queue
+        +build_index(string)
+        +complete(string)
+        +search(vector~string~)
+    }
+
+    class Autocomplete4 {
+        -Parameters params
+        -ScoredStringPool string_pool
+        -CompletionTrie trie
+        -FCDictionary dictionary
+        -BlockedInvertedIndex index
+        +build_index(string)
+        +complete(string)
+        +search(vector~string~)
+    }
+
+    %% Relationships
+    Probe <|-- NopProbe
+    Probe <|-- TimerProbe
+    TimerProbe *-- Timer
+    Autocomplete *-- Parameters
+    Autocomplete *-- ScoredStringPool
+    Autocomplete *-- CompletionTrie
+    Autocomplete *-- FCDictionary
+    Autocomplete *-- InvertedIndex
+    CompletionTrie *-- TrieNode
+    InvertedIndex *-- Block
+    ScoredStringPool *-- ScoredByteRange
+    Autocomplete2 --|> Autocomplete
+    Autocomplete3 --|> Autocomplete
+    Autocomplete4 --|> Autocomplete
+    Autocomplete3 *-- MinHeap
+    Autocomplete2 *-- CompactVector
+    Autocomplete4 *-- BlockedInvertedIndex
+```
+
+## Component Dependencies
+
+```mermaid
+graph TD
+    subgraph Core
+        Parameters
+        Probe
+        Timer
+    end
+
+    subgraph Data Structures
+        ScoredStringPool
+        CompletionTrie
+        FCDictionary
+        IntegerFCDictionary
+        InvertedIndex
+        BlockedInvertedIndex
+        CompactVector
+        BitVector
+        MinHeap
+    end
+
+    subgraph Implementation
+        Autocomplete
+        Autocomplete2
+        Autocomplete3
+        Autocomplete4
+    end
+
+    %% Dependencies
+    Parameters --> ScoredStringPool
+    Parameters --> CompletionTrie
+    Parameters --> FCDictionary
+    Parameters --> InvertedIndex
+    Parameters --> IntegerFCDictionary
+    
+    ScoredStringPool --> Autocomplete
+    CompletionTrie --> Autocomplete
+    FCDictionary --> Autocomplete
+    InvertedIndex --> Autocomplete
+    IntegerFCDictionary --> Autocomplete2
+    CompactVector --> Autocomplete2
+    MinHeap --> Autocomplete3
+    BlockedInvertedIndex --> Autocomplete4
+
+    style Core fill:#f9f,stroke:#333,stroke-width:2px
+    style Data Structures fill:#9f9,stroke:#333,stroke-width:2px
+    style Implementation fill:#99f,stroke:#333,stroke-width:2px
+```
+
+## Memory Layout
+
+```mermaid
+graph TD
+    subgraph Memory Organization
+        direction TB
+        Stack[Stack Memory] --> Heap[Heap Memory]
+        Heap --> Data[Data Structures]
+        Data --> Strings[String Pool]
+        Data --> Trie[Trie Nodes]
+        Data --> Dict[Dictionary]
+        Data --> Index[Inverted Index]
+        Data --> Compact[Compact Vectors]
+        Data --> BitVec[Bit Vectors]
+    end
+
+    style Memory Organization fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+## Key Features and Methods
+
+### Core Components
+- **Parameters**: Configuration management
+- **Probe**: Performance measurement interface
+- **Timer**: Time tracking implementation
+
+### Data Structures
+- **ScoredStringPool**: String and score management
+- **CompletionTrie**: Prefix-based completion
+- **FCDictionary**: String compression
+- **IntegerFCDictionary**: Integer-based dictionary
+- **InvertedIndex**: Term-based search
+- **BlockedInvertedIndex**: Blocked term-based search
+- **CompactVector**: Space-efficient vector
+- **BitVector**: Bit-level operations
+- **MinHeap**: Priority queue implementation
+
+### Main Implementation
+- **Autocomplete**: Base implementation
+- **Autocomplete2**: Integer-based optimization
+- **Autocomplete3**: Min-heap based optimization
+- **Autocomplete4**: Blocked index optimization
+
+## Usage Example
+
+```cpp
+// Initialize components
+Parameters params;
+params.load("config.stats");
+
+ScoredStringPool pool(POOL_SIZE);
+CompletionTrie trie;
+FCDictionary dict;
+InvertedIndex index;
+
+// Build autocomplete system
+Autocomplete ac(params, pool, trie, dict, index);
+ac.build_index("data.txt");
+
+// Use the system
+auto completions = ac.complete("hello");
+auto results = ac.search({"hello", "world"});
+``` 
\ No newline at end of file
diff --git a/doc/component_diagram.md b/doc/component_diagram.md
new file mode 100644
index 0000000..5c9fd83
--- /dev/null
+++ b/doc/component_diagram.md
@@ -0,0 +1,45 @@
+# Component Relationships
+
+```mermaid
+graph TD
+    subgraph Core
+        Constants[Constants]
+        Parameters[Parameters]
+        Probe[Performance Probe]
+    end
+
+    subgraph Data Structures
+        StringPool[String Pool]
+        Trie[Completion Trie]
+        Dictionary[Front-Coded Dictionary]
+        Index[Blocked Inverted Index]
+    end
+
+    subgraph Pipeline
+        Input[Input Processing]
+        Build[Index Building]
+        Query[Query Processing]
+    end
+
+    %% Core Dependencies
+    Constants --> Parameters
+    Parameters --> StringPool
+    Parameters --> Trie
+    Parameters --> Dictionary
+    Parameters --> Index
+    Probe --> Query
+
+    %% Data Structure Dependencies
+    Dictionary --> Trie
+    Trie --> Index
+    StringPool --> Dictionary
+    StringPool --> Trie
+    StringPool --> Index
+
+    %% Pipeline Dependencies
+    Input --> Build
+    Build --> Query
+    Query --> Trie
+    Query --> Index
+    Query --> Dictionary
+``` 
\ No newline at end of file
diff --git a/doc/cpp_structure.md b/doc/cpp_structure.md
new file mode 100644
index 0000000..bfa42d0
--- /dev/null
+++ b/doc/cpp_structure.md
@@ -0,0 +1,153 @@
+# C++ Code Structure Documentation
+
+This document outlines the structure of the original C++ implementation that is being ported to Rust.
+
+## Core Components
+
+### 1. Constants and Configuration
+- **File**: `constants.hpp`
+- **Purpose**: Defines system-wide constants and limits
+- **Key Constants**:
+  - `MAX_K`: Maximum number of completions
+  - `MAX_NUM_TERMS_PER_QUERY`: Maximum terms per query
+  - `MAX_NUM_CHARS_PER_QUERY`: Maximum characters per query
+  - `POOL_SIZE`: Size of the string pool
+
+### 2. Parameters Management
+- **File**: `parameters.hpp`
+- **Purpose**: Manages system configuration parameters
+- **Key Struct**: `parameters`
+  - `num_terms`: Total number of terms
+  - `max_string_length`: Maximum string length
+  - `num_completions`: Number of completions
+  - `universe`: Size of the universe
+  - `num_levels`: Number of levels in the index
+  - `nodes_per_level`: Vector of nodes per level
+  - `collection_basename`: Base name for collection files
+
+### 3. Performance Measurement
+- **File**: `probe.hpp`
+- **Purpose**: Performance measurement and timing
+- **Key Structs**:
+  - `nop_probe`: No-operation probe
+  - `timer_probe`: Timer-based performance measurement
+
+### 4. String Pool Management
+- **File**: `scored_string_pool.hpp`
+- **Purpose**: Manages a pool of scored strings
+- **Key Components**:
+  - String storage
+  - Score management
+  - Pool operations
+
+### 5. Completion Trie
+- **File**: `completion_trie.hpp`
+- **Purpose**: Implements the completion trie data structure
+- **Key Features**:
+  - Prefix-based completion
+  - Node management
+  - Traversal operations
+
+### 6. Blocked Inverted Index
+- **File**: `blocked_inverted_index.hpp`
+- **Purpose**: Implements blocked inverted indexing
+- **Key Components**:
+  - Block management
+  - Index operations
+  - Query processing
+
+### 7. Front-Coded Dictionary
+- **File**: `fc_dictionary.hpp`
+- **Purpose**: Implements front-coding for dictionary compression
+- **Key Features**:
+  - String compression
+  - Dictionary operations
+  - Lookup functionality
+
+## Data Pipeline
+
+1. **Input Processing**
+   - Read input completions
+   - Sort lexicographically
+   - Generate statistics
+
+2. **Index Building**
+   - Build front-coded dictionary
+   - Construct completion trie
+   - Create blocked inverted index
+
+3. **Query Processing**
+   - Parse input query
+   - Traverse completion trie
+   - Search inverted index
+   - Return top-k completions
+
+## Key Methods and Operations
+
+### Dictionary Operations
+```cpp
+// Front-coded dictionary
+void build_dictionary();
+void compress_strings();
+std::string lookup(uint32_t id);
+```
+
+### Trie Operations
+```cpp
+// Completion trie
+void insert(const std::string& completion);
+std::vector<std::string> complete(const std::string& prefix);
+```
+
+### Index Operations
+```cpp
+// Blocked inverted index
+void build_index();
+std::vector<uint32_t> search(const std::vector<uint32_t>& terms);
+```
+
+### Query Processing
+```cpp
+// Query handling
+std::vector<std::string> process_query(const std::string& query);
+void rank_completions(std::vector<std::string>& completions);
+```
+
+## Dependencies and Relationships
+
+1. **Core Dependencies**
+   - Constants → Parameters
+   - Parameters → All major components
+   - Probe → Performance measurement
+
+2. **Data Structure Dependencies**
+   - Front-coded Dictionary → Completion Trie
+   - Completion Trie → Blocked Inverted Index
+   - All components → String Pool
+
+3. **Pipeline Dependencies**
+   - Input Processing → Index Building
+   - Index Building → Query Processing
+   - Query Processing → All components
+
+## Porting Strategy
+
+1. **Phase 1: Core Components**
+   - Constants and configuration
+   - Parameters management
+   - Performance measurement
+
+2. **Phase 2: Data Structures**
+   - String pool
+   - Completion trie
+   - Front-coded dictionary
+
+3. **Phase 3: Index and Query**
+   - Blocked inverted index
+   - Query processing
+   - Pipeline integration
+
+4. **Phase 4: Testing and Optimization**
+   - Unit tests
+   - Integration tests
+   - Performance optimization 
\ No newline at end of file
diff --git a/doc/data_structures.md b/doc/data_structures.md
new file mode 100644
index 0000000..9da8761
--- /dev/null
+++ b/doc/data_structures.md
@@ -0,0 +1,253 @@
+# Data Structures Documentation
+
+This document details the key data structures used in the autocomplete system.
+
+## 1. Scored String Pool
+
+### Purpose
+Manages a fixed-size pool of strings with associated scores, optimized for fast retrieval and updates.
+
+### Structure
+```cpp
+struct scored_string_pool {
+    std::vector<std::string> strings;  // String storage
+    std::vector<float> scores;         // Associated scores
+    size_t size;                       // Current pool size
+    size_t capacity;                   // Maximum capacity
+};
+```
+
+### Visualization
+```mermaid
+graph TD
+    subgraph String Pool
+        direction LR
+        S1[String 1] --> SC1[Score 0.8]
+        S2[String 2] --> SC2[Score 0.6]
+        S3[String 3] --> SC3[Score 0.9]
+        S4[String 4] --> SC4[Score 0.7]
+    end
+    style String Pool fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+### Key Operations
+- `insert(string, score)`: Add a new string with its score
+- `get_score(index)`: Retrieve score for a string
+- `get_string(index)`: Retrieve string by index
+- `update_score(index, score)`: Update score for a string
+- `clear()`: Reset the pool
+
+### Memory Management
+- Fixed-size allocation to prevent reallocations
+- Contiguous memory layout for cache efficiency
+- Score and string data stored separately for better cache utilization
+
+## 2. Completion Trie
+
+### Purpose
+Efficient prefix-based string completion using a trie data structure.
+
+### Structure
+```cpp
+struct trie_node {
+    std::unordered_map<char, trie_node*> children;
+    bool is_terminal;
+    std::vector<uint32_t> completion_ids;
+};
+
+struct completion_trie {
+    trie_node* root;
+    size_t num_nodes;
+    size_t num_completions;
+};
+```
+
+### Visualization
+```mermaid
+graph TD
+    Root((Root)) --> H((h))
+    H --> HE((e))
+    HE --> HEL((l))
+    HEL --> HELL((l))
+    HELL --> HELLO((o))
+    HELLO --> HELLOW((w))
+    HELLOW --> HELLOWO((o))
+    HELLOWO --> HELLOWOR((r))
+    HELLOWOR --> HELLOWORL((l))
+    HELLOWORL --> HELLOWORLD((d))
+    
+    style Root fill:#f9f,stroke:#333,stroke-width:2px
+    style HELLOWORLD fill:#9f9,stroke:#333,stroke-width:2px
+```
+
+### Key Operations
+- `insert(completion)`: Add a new completion string
+- `complete(prefix)`: Find all completions for a prefix
+- `remove(completion)`: Remove a completion string
+- `clear()`: Reset the trie
+
+### Optimizations
+- Path compression for common prefixes
+- Node sharing for similar completions
+- Lazy deletion for better performance
+
+## 3. Front-Coded Dictionary
+
+### Purpose
+Compressed string dictionary using front-coding technique.
+
+### Structure
+```cpp
+struct fc_dictionary {
+    std::vector<char> data;           // Compressed string data
+    std::vector<uint32_t> offsets;    // String offsets
+    size_t num_strings;               // Number of strings
+    size_t total_size;                // Total compressed size
+};
+```
+
+### Visualization
+```mermaid
+graph LR
+    subgraph Front-Coded Dictionary
+        direction LR
+        S1[hello] --> |shared prefix| S2[helloworld]
+        S2 --> |shared prefix| S3[hellothere]
+        S3 --> |shared prefix| S4[hellokitty]
+    end
+    style Front-Coded Dictionary fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+### Key Operations
+- `build(strings)`: Build dictionary from string list
+- `lookup(id)`: Retrieve string by ID
+- `compress()`: Apply front-coding compression
+- `decompress(id)`: Decompress a specific string
+
+### Compression Details
+- Common prefixes shared between consecutive strings
+- Variable-length encoding for shared prefix lengths
+- Delta encoding for string differences
+
+## 4. Blocked Inverted Index
+
+### Purpose
+Efficient term-based search using blocked inverted indexing.
+
+### Structure
+```cpp
+struct block {
+    std::vector<uint32_t> doc_ids;    // Document IDs in block
+    uint32_t min_doc_id;              // Minimum doc ID in block
+    uint32_t max_doc_id;              // Maximum doc ID in block
+};
+
+struct inverted_index {
+    std::vector<block> blocks;        // Index blocks
+    std::unordered_map<std::string, std::vector<uint32_t>> term_to_blocks;
+    size_t block_size;                // Size of each block
+};
+```
+
+### Visualization
+```mermaid
+graph TD
+    subgraph Inverted Index
+        direction TB
+        T1[Term 1] --> B1[Block 1]
+        T1 --> B2[Block 2]
+        T2[Term 2] --> B2
+        T2 --> B3[Block 3]
+        T3[Term 3] --> B1
+        T3 --> B3
+        
+        subgraph Block 1
+            D1[Doc 1]
+            D2[Doc 2]
+            D3[Doc 3]
+        end
+        
+        subgraph Block 2
+            D4[Doc 4]
+            D5[Doc 5]
+        end
+        
+        subgraph Block 3
+            D6[Doc 6]
+            D7[Doc 7]
+        end
+    end
+    style Inverted Index fill:#f9f,stroke:#333,stroke-width:2px
+```
+
+### Key Operations
+- `add_document(doc_id, terms)`: Add document to index
+- `search(terms)`: Find documents containing terms
+- `merge_blocks()`: Optimize block structure
+- `clear()`: Reset the index
+
+### Blocking Strategy
+- Fixed-size blocks for predictable memory usage
+- Block-level compression for space efficiency
+- Skip pointers for faster traversal
+
+## Memory and Performance Considerations
+
+### Memory Layout
+1. **Contiguous Storage**
+   - Strings stored in contiguous memory
+   - Scores aligned for SIMD operations
+   - Block data packed efficiently
+
+2. **Cache Optimization**
+   - Hot data kept together
+   - Cold data separated
+   - Alignment for cache lines
+
+### Performance Optimizations
+1. **String Operations**
+   - String interning for deduplication
+   - Small string optimization
+   - Custom string comparison
+
+2. **Search Optimizations**
+   - Block-level skipping
+   - Term frequency caching
+   - Result set intersection optimization
+
+3. **Memory Management**
+   - Custom allocators for specific structures
+   - Memory pooling for frequent allocations
+   - Lazy initialization where appropriate
+
+## Usage Examples
+
+### String Pool Usage
+```cpp
+scored_string_pool pool(POOL_SIZE);
+pool.insert("completion1", 0.8);
+pool.insert("completion2", 0.6);
+auto completions = pool.get_top_k(10);
+```
+
+### Trie Usage
+```cpp
+completion_trie trie;
+trie.insert("hello world");
+trie.insert("hello there");
+auto results = trie.complete("hello");
+```
+
+### Dictionary Usage
+```cpp
+fc_dictionary dict;
+dict.build(strings);
+auto str = dict.lookup(42);
+```
+
+### Index Usage
+```cpp
+inverted_index index;
+index.add_document(1, {"term1", "term2"});
+auto docs = index.search({"term1", "term2"});
+``` 
\ No newline at end of file
diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt
deleted file mode 100644
index d4722aa..0000000
--- a/external/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-include_directories(essentials/include)
\ No newline at end of file
diff --git a/external/essentials b/external/essentials
deleted file mode 160000
index 3721ea2..0000000
--- a/external/essentials
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 3721ea2b02c24005088cb9efeb89b4090753bbf2
diff --git a/external/jQuery-Autocomplete b/external/jQuery-Autocomplete
deleted file mode 160000
index 0ba2565..0000000
--- a/external/jQuery-Autocomplete
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0ba256501bc365814f43066999f51f0619e739a9
diff --git a/external/mongoose b/external/mongoose
deleted file mode 160000
index c41a221..0000000
--- a/external/mongoose
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c41a22195ceabc02ffd0379f0e71d6c3575337aa
diff --git a/include/autocomplete.hpp b/include/autocomplete.hpp
deleted file mode 100644
index 9f01ed0..0000000
--- a/include/autocomplete.hpp
+++ /dev/null
@@ -1,343 +0,0 @@
-#pragma once
-
-#include "util_types.hpp"
-#include "autocomplete_common.hpp"
-#include "scored_string_pool.hpp"
-#include "constants.hpp"
-
-namespace autocomplete {
-
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex, typename ForwardIndex>
-struct autocomplete {
-    typedef scored_string_pool::iterator iterator_type;
-
-    autocomplete() {
-        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
-    }
-
-    autocomplete(parameters const& params)
-        : autocomplete() {
-        typename Completions::builder cm_builder(params);
-        typename Dictionary::builder di_builder(params);
-        typename InvertedIndex::builder ii_builder(params);
-        typename ForwardIndex::builder fi_builder(params);
-
-        m_unsorted_docs_list.build(cm_builder.doc_ids());
-        m_unsorted_minimal_docs_list.build(ii_builder.minimal_doc_ids());
-
-        cm_builder.build(m_completions);
-        di_builder.build(m_dictionary);
-        ii_builder.build(m_inverted_index);
-        fi_builder.build(m_forward_index);
-    }
-
-    iterator_type prefix_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        // NOTE: because the completion_trie works with 1-based ids
-        // (id 0 is reserved for null terminator)
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        return extract_strings(num_completions);
-    }
-
-    iterator_type conjunctive_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-
-        uint32_t num_completions = 0;
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        if (num_terms == 1) {  // special case
-            suffix_lex_range.end += 1;
-            num_completions = m_unsorted_minimal_docs_list.topk(
-                suffix_lex_range, k, m_pool.scores(),
-                true  // must return unique results
-            );
-        } else {
-            if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-
-        if (num_completions < k) {
-            if (num_terms == 1) {  // special case
-                suffix_lex_range.begin -= 1;
-                num_completions = m_unsorted_minimal_docs_list.topk(
-                    suffix_lex_range, k, m_pool.scores(),
-                    true  // must return unique results
-                );
-            } else {
-                if (prefix.size() == 1) {  // we've got nothing to intersect
-                    auto it = m_inverted_index.iterator(prefix.front() - 1);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                } else {
-                    auto it = m_inverted_index.intersection_iterator(prefix);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                }
-            }
-        }
-
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k,
-                       std::vector<timer_type>& timers) {
-        assert(k <= constants::MAX_K);
-
-        // step 1: parsing
-        timers[0].start();
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-        timers[0].stop();
-
-        // step 2: prefix search
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-        timers[1].stop();
-
-        // step 3: conjunctive search
-        timers[2].start();
-        if (num_completions < k) {
-            if (num_terms == 1) {  // special case
-                suffix_lex_range.begin -= 1;
-                num_completions = m_unsorted_minimal_docs_list.topk(
-                    suffix_lex_range, k, m_pool.scores(),
-                    true  // must return unique results
-                );
-            } else {
-                if (prefix.size() == 1) {  // we've got nothing to intersect
-                    auto it = m_inverted_index.iterator(prefix.front() - 1);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                } else {
-                    auto it = m_inverted_index.intersection_iterator(prefix);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                }
-            }
-        }
-        timers[2].stop();
-
-        // step 4: reporting
-        timers[3].start();
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type prefix_topk(std::string const& query, uint32_t const k,
-                              std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type conjunctive_topk(std::string const& query, uint32_t const k,
-                                   std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-        timers[0].stop();
-
-        uint32_t num_completions = 0;
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        if (num_terms == 1) {  // special case
-
-            suffix_lex_range.end += 1;
-            num_completions = m_unsorted_minimal_docs_list.topk(
-                suffix_lex_range, k, m_pool.scores(),
-                true  // must return unique results
-            );
-
-        } else {
-            if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    size_t bytes() const {
-        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
-               m_unsorted_minimal_docs_list.bytes() + m_dictionary.bytes() +
-               m_inverted_index.bytes() + m_forward_index.bytes();
-    }
-
-    void print_stats() const;
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_completions);
-        visitor.visit(m_unsorted_docs_list);
-        visitor.visit(m_unsorted_minimal_docs_list);
-        visitor.visit(m_dictionary);
-        visitor.visit(m_inverted_index);
-        visitor.visit(m_forward_index);
-    }
-
-private:
-    Completions m_completions;
-    UnsortedDocsList m_unsorted_docs_list;
-    UnsortedDocsList m_unsorted_minimal_docs_list;
-    Dictionary m_dictionary;
-    InvertedIndex m_inverted_index;
-    ForwardIndex m_forward_index;
-
-    scored_string_pool m_pool;
-
-    void init() {
-        m_pool.clear();
-        m_pool.init();
-        assert(m_pool.size() == 0);
-    }
-
-    template <typename Iterator>
-    uint32_t conjunctive_topk(Iterator& it, const range r, uint32_t const k) {
-        auto& topk_scores = m_pool.scores();
-        uint32_t results = 0;
-        for (; it.has_next(); ++it) {
-            auto doc_id = *it;
-            if (m_forward_index.intersects(doc_id, r)) {
-                topk_scores[results++] = doc_id;
-                if (results == k) break;
-            }
-        }
-        return results;
-    }
-
-    iterator_type extract_strings(const uint32_t num_completions) {
-        auto const& topk_scores = m_pool.scores();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto doc_id = topk_scores[i];
-            auto it = m_forward_index.iterator(doc_id);
-            uint64_t offset = m_pool.bytes();
-            uint8_t* decoded = m_pool.data() + offset;
-            for (uint32_t j = 0; j != it.size(); ++j, ++it) {
-                auto term_id = *it;
-                uint8_t len = m_dictionary.extract(term_id, decoded);
-                decoded += len;
-                offset += len;
-                if (j != it.size() - 1) {
-                    *decoded++ = ' ';
-                    offset++;
-                }
-            }
-            m_pool.push_back_offset(offset);
-        }
-        assert(m_pool.size() == num_completions);
-        return m_pool.begin();
-    }
-};
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/autocomplete2.hpp b/include/autocomplete2.hpp
deleted file mode 100644
index 3003c02..0000000
--- a/include/autocomplete2.hpp
+++ /dev/null
@@ -1,400 +0,0 @@
-#pragma once
-
-#include "util_types.hpp"
-#include "building_util.hpp"
-#include "compact_vector.hpp"
-#include "autocomplete_common.hpp"
-#include "scored_string_pool.hpp"
-#include "constants.hpp"
-
-namespace autocomplete {
-
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex>
-struct autocomplete2 {
-    typedef scored_string_pool::iterator iterator_type;
-
-    autocomplete2() {
-        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
-        m_topk_completion_set.resize(constants::MAX_K,
-                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
-    }
-
-    autocomplete2(parameters const& params)
-        : autocomplete2() {
-        typename Completions::builder cm_builder(params);
-        typename Dictionary::builder di_builder(params);
-        typename InvertedIndex::builder ii_builder(params);
-
-        auto const& doc_ids = cm_builder.doc_ids();
-        m_unsorted_docs_list.build(doc_ids);
-        m_unsorted_minimal_docs_list.build(ii_builder.minimal_doc_ids());
-
-        {
-            essentials::logger("building map from doc_id to lex_id...");
-            uint64_t n = doc_ids.size();
-            typedef std::vector<std::pair<id_type, id_type>> id_map_type;
-            id_map_type ids;
-            ids.reserve(n);
-            for (id_type lex_id = 0; lex_id != n; ++lex_id) {
-                ids.emplace_back(lex_id, doc_ids[lex_id]);
-            }
-            std::sort(ids.begin(), ids.end(), [](auto const& l, auto const& r) {
-                return l.second < r.second;
-            });
-            m_docid_to_lexid.build(
-                util::first_iterator<typename id_map_type::const_iterator>(
-                    ids.begin()),
-                ids.size());
-            essentials::logger("DONE");
-        }
-
-        cm_builder.build(m_completions);
-        di_builder.build(m_dictionary);
-        ii_builder.build(m_inverted_index);
-    }
-
-    iterator_type prefix_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type conjunctive_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions = 0;
-
-        if (num_terms == 1) {  // special case
-            suffix_lex_range.end += 1;
-            num_completions = m_unsorted_minimal_docs_list.topk(
-                suffix_lex_range, k, m_pool.scores(),
-                true  // must return unique results
-            );
-            extract_completions(num_completions);
-        } else {
-            if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-
-        if (num_completions < k) {
-            if (num_terms == 1) {  // special case
-                suffix_lex_range.begin -= 1;
-                num_completions = m_unsorted_minimal_docs_list.topk(
-                    suffix_lex_range, k, m_pool.scores(),
-                    true  // must return unique results
-                );
-                extract_completions(num_completions);
-            } else {
-                if (prefix.size() == 1) {  // we've got nothing to intersect
-                    auto it = m_inverted_index.iterator(prefix.front() - 1);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                } else {
-                    auto it = m_inverted_index.intersection_iterator(prefix);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                }
-            }
-        } else {
-            extract_completions(num_completions);
-        }
-
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k,
-                       std::vector<timer_type>& timers) {
-        assert(k <= constants::MAX_K);
-
-        timers[0].start();
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-        timers[0].stop();
-
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-        timers[1].stop();
-
-        timers[2].start();
-        if (num_completions < k) {
-            if (num_terms == 1) {  // special case
-                suffix_lex_range.begin -= 1;
-                num_completions = m_unsorted_minimal_docs_list.topk(
-                    suffix_lex_range, k, m_pool.scores(),
-                    true  // must return unique results
-                );
-                extract_completions(num_completions);
-            } else {
-                if (prefix.size() == 1) {  // we've got nothing to intersect
-                    auto it = m_inverted_index.iterator(prefix.front() - 1);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                } else {
-                    auto it = m_inverted_index.intersection_iterator(prefix);
-                    num_completions = conjunctive_topk(it, suffix_lex_range, k);
-                }
-            }
-        } else {
-            extract_completions(num_completions);
-        }
-        timers[2].stop();
-
-        timers[3].start();
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type prefix_topk(std::string const& query, uint32_t const k,
-                              std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type conjunctive_topk(std::string const& query, uint32_t const k,
-                                   std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-        timers[0].stop();
-
-        uint32_t num_completions = 0;
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        if (num_terms == 1) {  // special case
-            suffix_lex_range.end += 1;
-            num_completions = m_unsorted_minimal_docs_list.topk(
-                suffix_lex_range, k, m_pool.scores(),
-                true  // must return unique results
-            );
-            extract_completions(num_completions);
-        } else {
-            if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    size_t bytes() const {
-        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
-               m_unsorted_minimal_docs_list.bytes() + m_dictionary.bytes() +
-               m_docid_to_lexid.bytes() + m_inverted_index.bytes();
-    }
-
-    void print_stats() const;
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_completions);
-        visitor.visit(m_unsorted_docs_list);
-        visitor.visit(m_unsorted_minimal_docs_list);
-        visitor.visit(m_dictionary);
-        visitor.visit(m_inverted_index);
-        visitor.visit(m_docid_to_lexid);
-    }
-
-private:
-    Completions m_completions;
-    UnsortedDocsList m_unsorted_docs_list;
-    UnsortedDocsList m_unsorted_minimal_docs_list;
-    Dictionary m_dictionary;
-    InvertedIndex m_inverted_index;
-    compact_vector m_docid_to_lexid;
-
-    scored_string_pool m_pool;
-    completion_set m_topk_completion_set;
-
-    void init() {
-        m_pool.clear();
-        m_pool.init();
-        assert(m_pool.size() == 0);
-    }
-
-    // NOTE: this can be done more efficienctly exploiting
-    // the fact that the strings to be extracted share a common
-    // prefix, thus this task should be delegated to the
-    // integer_fc_dictionary... (enchance the locality of the operation)
-    // NOTE: this only work when used during the prefix_topk step.
-    void extract_completions(const uint32_t num_completions) {
-        auto const& topk_scores = m_pool.scores();
-        auto& completions = m_topk_completion_set.completions();
-        auto& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto doc_id = topk_scores[i];
-            auto lex_id = m_docid_to_lexid[doc_id];
-            uint8_t size = m_completions.extract(lex_id, completions[i]);
-            sizes[i] = size;
-        }
-    }
-
-    template <typename Iterator>
-    uint32_t conjunctive_topk(Iterator& it, const range r, const uint32_t k) {
-        auto& topk_scores = m_pool.scores();
-        auto& completions = m_topk_completion_set.completions();
-        auto& sizes = m_topk_completion_set.sizes();
-        uint32_t i = 0;
-
-        for (; it.has_next(); ++it) {
-            auto doc_id = *it;
-            auto lex_id = m_docid_to_lexid[doc_id];
-            uint32_t size = m_completions.extract(lex_id, completions[i]);
-
-            bool found = false;
-            for (uint32_t j = 0; j != size and !found; ++j) {
-                if (r.contains(completions[i][j])) found = true;
-            }
-
-            if (found) {
-                topk_scores[i] = doc_id;
-                sizes[i] = size;
-                ++i;
-                if (i == k) break;
-            }
-        }
-
-        return i;
-    }
-
-    iterator_type extract_strings(const uint32_t num_completions) {
-        auto const& completions = m_topk_completion_set.completions();
-        auto const& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto const& c = completions[i];
-            uint32_t size = sizes[i];
-            uint64_t offset = m_pool.bytes();
-            uint8_t* decoded = m_pool.data() + offset;
-            for (uint32_t j = 0; j != size; ++j) {
-                auto term_id = c[j];
-                uint8_t len = m_dictionary.extract(term_id, decoded);
-                decoded += len;
-                offset += len;
-                if (j != size - 1) {
-                    *decoded++ = ' ';
-                    offset++;
-                }
-            }
-            m_pool.push_back_offset(offset);
-        }
-        assert(m_pool.size() == num_completions);
-        return m_pool.begin();
-    }
-};
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/autocomplete3.hpp b/include/autocomplete3.hpp
deleted file mode 100644
index 550aac5..0000000
--- a/include/autocomplete3.hpp
+++ /dev/null
@@ -1,388 +0,0 @@
-#pragma once
-
-#include "util_types.hpp"
-#include "building_util.hpp"
-#include "compact_vector.hpp"
-#include "autocomplete_common.hpp"
-#include "scored_string_pool.hpp"
-#include "min_heap.hpp"
-#include "constants.hpp"
-
-namespace autocomplete {
-
-/*
-During the conjunctive step, maintain a min-heap of iterators,
-one iterator for each termID in the lexicographic range of the
-last token of the query.
-*/
-
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename InvertedIndex>
-struct autocomplete3 {
-    typedef scored_string_pool::iterator iterator_type;
-    typedef min_heap<typename InvertedIndex::iterator_type,
-                     iterator_comparator<typename InvertedIndex::iterator_type>>
-        min_priority_queue_type;
-
-    autocomplete3() {
-        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
-        m_topk_completion_set.resize(constants::MAX_K,
-                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
-    }
-
-    autocomplete3(parameters const& params)
-        : autocomplete3() {
-        typename Completions::builder cm_builder(params);
-        typename Dictionary::builder di_builder(params);
-        typename InvertedIndex::builder ii_builder(params);
-
-        auto const& doc_ids = cm_builder.doc_ids();
-        m_unsorted_docs_list.build(doc_ids);
-
-        {
-            essentials::logger("building map from doc_id to lex_id...");
-            uint64_t n = doc_ids.size();
-            typedef std::vector<std::pair<id_type, id_type>> id_map_type;
-            id_map_type ids;
-            ids.reserve(n);
-            for (id_type lex_id = 0; lex_id != n; ++lex_id) {
-                ids.emplace_back(lex_id, doc_ids[lex_id]);
-            }
-            std::sort(ids.begin(), ids.end(), [](auto const& l, auto const& r) {
-                return l.second < r.second;
-            });
-            m_docid_to_lexid.build(
-                util::first_iterator<typename id_map_type::const_iterator>(
-                    ids.begin()),
-                ids.size());
-            essentials::logger("DONE");
-        }
-
-        cm_builder.build(m_completions);
-        di_builder.build(m_dictionary);
-        ii_builder.build(m_inverted_index);
-    }
-
-    iterator_type prefix_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type conjunctive_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        uint32_t num_completions = 0;
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        if (prefix.size() == 1) {  // we've got nothing to intersect
-            auto it = m_inverted_index.iterator(prefix.front() - 1);
-            num_completions = conjunctive_topk(it, suffix_lex_range, k);
-        } else {
-            auto it = m_inverted_index.intersection_iterator(prefix);
-            num_completions = conjunctive_topk(it, suffix_lex_range, k);
-        }
-
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-
-        if (num_completions < k) {
-            if (num_terms == 1) {  // we've got nothing to intersect
-                iterator it(0, m_inverted_index.num_docs());
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k,
-                       std::vector<timer_type>& timers) {
-        assert(k <= constants::MAX_K);
-
-        timers[0].start();
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        uint32_t num_terms = parse(m_dictionary, query, prefix, suffix);
-        assert(num_terms > 0);
-        timers[0].stop();
-
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-        timers[1].stop();
-
-        timers[2].start();
-        if (num_completions < k) {
-            if (num_terms == 1) {  // we've got nothing to intersect
-                iterator it(0, m_inverted_index.num_docs());
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else if (prefix.size() == 1) {  // we've got nothing to intersect
-                auto it = m_inverted_index.iterator(prefix.front() - 1);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            } else {
-                auto it = m_inverted_index.intersection_iterator(prefix);
-                num_completions = conjunctive_topk(it, suffix_lex_range, k);
-            }
-        }
-        timers[2].stop();
-
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type prefix_topk(std::string const& query, uint32_t const k,
-                              std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type conjunctive_topk(std::string const& query, uint32_t const k,
-                                   std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        uint32_t num_completions = 0;
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        if (prefix.size() == 1) {  // we've got nothing to intersect
-            auto it = m_inverted_index.iterator(prefix.front() - 1);
-            num_completions = conjunctive_topk(it, suffix_lex_range, k);
-        } else {
-            auto it = m_inverted_index.intersection_iterator(prefix);
-            num_completions = conjunctive_topk(it, suffix_lex_range, k);
-        }
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    size_t bytes() const {
-        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
-               m_dictionary.bytes() + m_docid_to_lexid.bytes() +
-               m_inverted_index.bytes();
-    }
-
-    void print_stats() const;
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_completions);
-        visitor.visit(m_unsorted_docs_list);
-        visitor.visit(m_dictionary);
-        visitor.visit(m_inverted_index);
-        visitor.visit(m_docid_to_lexid);
-    }
-
-private:
-    Completions m_completions;
-    UnsortedDocsList m_unsorted_docs_list;
-    Dictionary m_dictionary;
-    InvertedIndex m_inverted_index;
-    compact_vector m_docid_to_lexid;
-
-    scored_string_pool m_pool;
-    completion_set m_topk_completion_set;
-
-    void init() {
-        m_pool.clear();
-        m_pool.init();
-        assert(m_pool.size() == 0);
-    }
-
-    // NOTE: this can be done more efficienctly exploiting
-    // the fact that the strings to be extracted share a common
-    // prefix, thus this task should be delegated to the
-    // integer_fc_dictionary... (enchance the locality of the operation)
-    // NOTE: this only work when used during the prefix_topk step.
-    void extract_completions(const uint32_t num_completions) {
-        auto const& topk_scores = m_pool.scores();
-        auto& completions = m_topk_completion_set.completions();
-        auto& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto doc_id = topk_scores[i];
-            auto lex_id = m_docid_to_lexid[doc_id];
-            uint8_t size = m_completions.extract(lex_id, completions[i]);
-            sizes[i] = size;
-        }
-    }
-
-    template <typename Iterator>
-    uint32_t conjunctive_topk(Iterator& it, const range r, const uint32_t k) {
-        assert(!r.is_invalid());
-
-        auto& topk_scores = m_pool.scores();
-        min_priority_queue_type q;
-        q.reserve(r.end - r.begin + 1);  // inclusive range
-        assert(r.begin > 0);
-        for (uint64_t term_id = r.begin; term_id <= r.end; ++term_id) {
-            q.push_back(m_inverted_index.iterator(term_id - 1));
-        }
-        q.make_heap();
-
-        uint32_t results = 0;
-        for (; it.has_next() and !q.empty(); ++it) {
-            auto doc_id = *it;
-
-            bool found = false;
-            while (!q.empty() and !found) {
-                auto& z = q.top();
-                auto val = *z;
-                if (val > doc_id) break;
-                if (val < doc_id) {
-                    val = z.next_geq(doc_id);
-                    if (!z.has_next()) {
-                        q.pop();
-                    } else {
-                        q.heapify();
-                    }
-                }
-                if (val == doc_id) found = true;
-            }
-
-            if (found) {
-                topk_scores[results++] = doc_id;
-                if (results == k) break;
-            }
-        }
-
-        return results;
-    }
-
-    iterator_type extract_strings(const uint32_t num_completions) {
-        auto const& completions = m_topk_completion_set.completions();
-        auto const& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto const& c = completions[i];
-            uint32_t size = sizes[i];
-            uint64_t offset = m_pool.bytes();
-            uint8_t* decoded = m_pool.data() + offset;
-            for (uint32_t j = 0; j != size; ++j) {
-                auto term_id = c[j];
-                uint8_t len = m_dictionary.extract(term_id, decoded);
-                decoded += len;
-                offset += len;
-                if (j != size - 1) {
-                    *decoded++ = ' ';
-                    offset++;
-                }
-            }
-            m_pool.push_back_offset(offset);
-        }
-        assert(m_pool.size() == num_completions);
-        return m_pool.begin();
-    }
-};
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/autocomplete4.hpp b/include/autocomplete4.hpp
deleted file mode 100644
index 8b3d882..0000000
--- a/include/autocomplete4.hpp
+++ /dev/null
@@ -1,322 +0,0 @@
-#pragma once
-
-#include "util_types.hpp"
-#include "building_util.hpp"
-#include "compact_vector.hpp"
-#include "autocomplete_common.hpp"
-#include "scored_string_pool.hpp"
-#include "min_heap.hpp"
-#include "constants.hpp"
-
-namespace autocomplete {
-
-/* Bast and Weber approach. */
-
-template <typename Completions, typename UnsortedDocsList, typename Dictionary,
-          typename BlockedInvertedIndex>
-struct autocomplete4 {
-    typedef scored_string_pool::iterator iterator_type;
-
-    autocomplete4() {
-        m_pool.resize(constants::POOL_SIZE, constants::MAX_K);
-        m_topk_completion_set.resize(constants::MAX_K,
-                                     2 * constants::MAX_NUM_TERMS_PER_QUERY);
-    }
-
-    autocomplete4(parameters const& params, float c)
-        : autocomplete4() {
-        typename Completions::builder cm_builder(params);
-        typename Dictionary::builder di_builder(params);
-        typename BlockedInvertedIndex::builder ii_builder(params, c);
-
-        auto const& doc_ids = cm_builder.doc_ids();
-        m_unsorted_docs_list.build(doc_ids);
-
-        {
-            essentials::logger("building map from doc_id to lex_id...");
-            uint64_t n = doc_ids.size();
-            typedef std::vector<std::pair<id_type, id_type>> id_map_type;
-            id_map_type ids;
-            ids.reserve(n);
-            for (id_type lex_id = 0; lex_id != n; ++lex_id) {
-                ids.emplace_back(lex_id, doc_ids[lex_id]);
-            }
-            std::sort(ids.begin(), ids.end(), [](auto const& l, auto const& r) {
-                return l.second < r.second;
-            });
-            m_docid_to_lexid.build(
-                util::first_iterator<typename id_map_type::const_iterator>(
-                    ids.begin()),
-                ids.size());
-            essentials::logger("DONE");
-        }
-
-        cm_builder.build(m_completions);
-        di_builder.build(m_dictionary);
-        ii_builder.build(m_inverted_index);
-    }
-
-    iterator_type prefix_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type conjunctive_topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        uint32_t num_completions =
-            conjunctive_topk(prefix, suffix_lex_range, k, m_pool.scores());
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k) {
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-
-        if (num_completions < k) {
-            num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
-        }
-
-        extract_completions(num_completions);
-        return extract_strings(num_completions);
-    }
-
-    iterator_type topk(std::string const& query, const uint32_t k,
-                       std::vector<timer_type>& timers) {
-        assert(k <= constants::MAX_K);
-
-        timers[0].start();
-        init();
-        completion_type prefix;
-        byte_range suffix;
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        uint32_t num_completions = 0;
-        if (!r.is_invalid()) {
-            num_completions = m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        }
-        timers[1].stop();
-
-        timers[2].start();
-        if (num_completions < k) {
-            num_completions = conjunctive_topk(prefix, suffix_lex_range, k);
-        }
-        timers[2].stop();
-
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type prefix_topk(std::string const& query, uint32_t const k,
-                              std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range r = m_completions.locate_prefix(prefix, suffix_lex_range);
-        if (r.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        uint32_t num_completions =
-            m_unsorted_docs_list.topk(r, k, m_pool.scores());
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    // for benchmarking
-    iterator_type conjunctive_topk(std::string const& query, uint32_t const k,
-                                   std::vector<timer_type>& timers) {
-        // step 0
-        timers[0].start();
-        assert(k <= constants::MAX_K);
-        init();
-        completion_type prefix;
-        byte_range suffix{0, 0};
-        parse(m_dictionary, query, prefix, suffix);
-        timers[0].stop();
-
-        uint32_t num_completions = 0;
-
-        // step 1
-        timers[1].start();
-        range suffix_lex_range = m_dictionary.locate_prefix(suffix);
-        if (suffix_lex_range.is_invalid()) return m_pool.begin();
-        timers[1].stop();
-
-        // step 2
-        timers[2].start();
-        num_completions =
-            conjunctive_topk(prefix, suffix_lex_range, k, m_pool.scores());
-        timers[2].stop();
-
-        // step 3
-        timers[3].start();
-        extract_completions(num_completions);
-        auto it = extract_strings(num_completions);
-        timers[3].stop();
-
-        return it;
-    }
-
-    size_t bytes() const {
-        return m_completions.bytes() + m_unsorted_docs_list.bytes() +
-               m_dictionary.bytes() + m_docid_to_lexid.bytes() +
-               m_inverted_index.bytes();
-    }
-
-    void print_stats() const;
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_completions);
-        visitor.visit(m_unsorted_docs_list);
-        visitor.visit(m_dictionary);
-        visitor.visit(m_inverted_index);
-        visitor.visit(m_docid_to_lexid);
-    }
-
-private:
-    Completions m_completions;
-    UnsortedDocsList m_unsorted_docs_list;
-    Dictionary m_dictionary;
-    BlockedInvertedIndex m_inverted_index;
-    compact_vector m_docid_to_lexid;
-
-    scored_string_pool m_pool;
-    completion_set m_topk_completion_set;
-
-    void init() {
-        m_pool.clear();
-        m_pool.init();
-        assert(m_pool.size() == 0);
-    }
-
-    // NOTE: this can be done more efficienctly exploiting
-    // the fact that the strings to be extracted share a common
-    // prefix, thus this task should be delegated to the
-    // integer_fc_dictionary... (enchance the locality of the operation)
-    // NOTE: this only work when used during the prefix_topk step.
-    void extract_completions(const uint32_t num_completions) {
-        auto const& topk_scores = m_pool.scores();
-        auto& completions = m_topk_completion_set.completions();
-        auto& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto doc_id = topk_scores[i];
-            auto lex_id = m_docid_to_lexid[doc_id];
-            uint8_t size = m_completions.extract(lex_id, completions[i]);
-            sizes[i] = size;
-        }
-    }
-
-    uint32_t conjunctive_topk(completion_type& prefix, const range suffix,
-                              const uint32_t k) {
-        auto& topk_scores = m_pool.scores();
-        auto it = m_inverted_index.intersection_iterator(prefix, suffix);
-        uint32_t results = 0;
-        for (; it.has_next(); ++it) {
-            auto doc_id = *it;
-            if (it.intersects()) {
-                topk_scores[results++] = doc_id;
-                if (results == k) break;
-            }
-        }
-        return results;
-    }
-
-    iterator_type extract_strings(const uint32_t num_completions) {
-        auto const& completions = m_topk_completion_set.completions();
-        auto const& sizes = m_topk_completion_set.sizes();
-        for (uint32_t i = 0; i != num_completions; ++i) {
-            auto const& c = completions[i];
-            uint32_t size = sizes[i];
-            uint64_t offset = m_pool.bytes();
-            uint8_t* decoded = m_pool.data() + offset;
-            for (uint32_t j = 0; j != size; ++j) {
-                auto term_id = c[j];
-                uint8_t len = m_dictionary.extract(term_id, decoded);
-                decoded += len;
-                offset += len;
-                if (j != size - 1) {
-                    *decoded++ = ' ';
-                    offset++;
-                }
-            }
-            m_pool.push_back_offset(offset);
-        }
-        assert(m_pool.size() == num_completions);
-        return m_pool.begin();
-    }
-};
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/autocomplete_common.hpp b/include/autocomplete_common.hpp
deleted file mode 100644
index c04f8b6..0000000
--- a/include/autocomplete_common.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include "util_types.hpp"
-
-namespace autocomplete {
-
-template <typename Dictionary>
-uint32_t parse(Dictionary const& dict, std::string const& query,
-               completion_type& prefix, byte_range& suffix) {
-    uint32_t num_terms = 1;
-    byte_range_iterator it(string_to_byte_range(query));
-    while (true) {
-        suffix = it.next();
-        if (!it.has_next()) break;
-        auto term_id = dict.locate(suffix);
-        prefix.push_back(term_id);
-        ++num_terms;
-    }
-    return num_terms;
-}
-
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/building_util.hpp b/include/building_util.hpp
deleted file mode 100644
index 17427b6..0000000
--- a/include/building_util.hpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#pragma once
-
-#include "bit_vector.hpp"
-
-namespace autocomplete {
-namespace util {
-
-void push_pad(bit_vector_builder& bvb, uint64_t alignment = 8) {
-    uint64_t mod = bvb.size() % alignment;
-    if (mod) {
-        uint64_t pad = alignment - mod;
-        bvb.append_bits(0, pad);
-        assert(bvb.size() % alignment == 0);
-    }
-}
-
-void eat_pad(bits_iterator<bit_vector>& it, uint64_t alignment = 8) {
-    uint64_t mod = it.position() % alignment;
-    if (mod) {
-        uint64_t pad = alignment - mod;
-        it.get_bits(pad);
-        assert(it.position() % alignment == 0);
-    }
-}
-
-template <typename Iterator>
-struct first_iterator
-    : std::iterator<std::forward_iterator_tag,
-                    typename Iterator::value_type::first_type> {
-    first_iterator(Iterator it, uint64_t state = 0)
-        : m_it(it)
-        , m_state(state) {}
-
-    typename Iterator::value_type::first_type operator*() {
-        return (*m_it).first;
-    }
-
-    first_iterator& operator++() {
-        m_it += 1;
-        m_state += 1;
-        return *this;
-    }
-
-    first_iterator operator+(uint64_t n) {
-        return {m_it + n, m_state + n};
-    }
-
-    bool operator==(first_iterator const& other) const {
-        return m_state == other.m_state;
-    }
-
-    bool operator!=(first_iterator const& other) const {
-        return !(*this == other);
-    }
-
-private:
-    Iterator m_it;
-    uint64_t m_state;
-};
-
-}  // namespace util
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/delta_forward_index.hpp b/include/delta_forward_index.hpp
deleted file mode 100644
index 6a302ab..0000000
--- a/include/delta_forward_index.hpp
+++ /dev/null
@@ -1,149 +0,0 @@
-#pragma once
-
-#include "parameters.hpp"
-#include "bit_vector.hpp"
-#include "ef/ef_sequence.hpp"
-
-namespace autocomplete {
-
-struct delta_forward_index {
-    struct builder {
-        builder() {}
-
-        builder(parameters const& params)
-            : m_num_integers(0)
-            , m_num_terms(params.num_terms) {
-            essentials::logger("building forward_index...");
-            uint64_t num_completions = params.num_completions;
-            std::ifstream input(
-                (params.collection_basename + ".forward").c_str(),
-                std::ios_base::in);
-            m_pointers.push_back(0);
-            for (uint64_t i = 0; i != num_completions; ++i) {
-                uint32_t n = 0;
-                input >> n;
-                assert(n > 0 and n < constants::MAX_NUM_TERMS_PER_QUERY);
-                write_gamma_nonzero(m_data, n);
-                m_num_integers += n;
-                for (uint64_t k = 0; k != n; ++k) {
-                    id_type x;
-                    input >> x;
-                    write_delta(m_data, x);
-                }
-                m_pointers.push_back(m_data.size());
-            }
-            m_pointers.pop_back();
-            input.close();
-            essentials::logger("DONE");
-        }
-
-        void swap(delta_forward_index::builder& other) {
-            std::swap(other.m_num_integers, m_num_integers);
-            std::swap(other.m_num_terms, m_num_terms);
-            other.m_pointers.swap(m_pointers);
-            other.m_data.swap(m_data);
-        }
-
-        void build(delta_forward_index& fi) {
-            fi.m_num_integers = m_num_integers;
-            fi.m_num_terms = m_num_terms;
-            fi.m_pointers.build(m_pointers);
-            fi.m_data.build(&m_data);
-            builder().swap(*this);
-        }
-
-    private:
-        uint64_t m_num_integers;
-        uint64_t m_num_terms;
-        std::vector<uint64_t> m_pointers;
-        bit_vector_builder m_data;
-    };
-
-    delta_forward_index() {}
-
-    struct forward_list_iterator_type {
-        forward_list_iterator_type(bits_iterator<bit_vector> const& it,
-                                   uint64_t n)
-            : m_it(it)
-            , m_n(n)
-            , m_i(0) {}
-
-        uint64_t size() const {
-            return m_n;
-        }
-
-        void operator++() {
-            m_i += 1;
-        }
-
-        id_type operator*() {
-            return read_delta(m_it);
-        }
-
-        bool intersects(const range r) {
-            for (uint64_t i = 0; i != size(); ++i) {
-                auto val = operator*();
-                if (r.contains(val)) return true;
-            }
-            return false;
-        }
-
-    private:
-        bits_iterator<bit_vector> m_it;
-        uint64_t m_n;
-        uint64_t m_i;
-    };
-
-    forward_list_iterator_type iterator(id_type doc_id) {
-        uint64_t offset = m_pointers.access(doc_id);
-        bits_iterator<bit_vector> it(m_data, offset);
-        uint64_t n = read_gamma_nonzero(it);
-        return {it, n};
-    }
-
-    bool intersects(const id_type doc_id, const range r) {
-        return iterator(doc_id).intersects(r);
-    }
-
-    uint64_t num_integers() const {
-        return m_num_integers;
-    }
-
-    uint64_t num_terms() const {
-        return m_num_terms;
-    }
-
-    uint64_t num_docs() const {
-        return m_pointers.size();
-    }
-
-    size_t data_bytes() const {
-        return m_data.bytes();
-    }
-
-    size_t pointer_bytes() const {
-        return m_pointers.bytes();
-    }
-
-    size_t bytes() const {
-        return essentials::pod_bytes(m_num_integers) +
-               essentials::pod_bytes(m_num_terms) + m_pointers.bytes() +
-               m_data.bytes();
-    }
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_num_integers);
-        visitor.visit(m_num_terms);
-        visitor.visit(m_pointers);
-        visitor.visit(m_data);
-    }
-
-private:
-    uint64_t m_num_integers;
-    uint64_t m_num_terms;
-    ef::ef_sequence m_pointers;
-    bit_vector m_data;
-};
-
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/forward_index.hpp b/include/forward_index.hpp
deleted file mode 100644
index 51c7c63..0000000
--- a/include/forward_index.hpp
+++ /dev/null
@@ -1,201 +0,0 @@
-#pragma once
-
-#include "parameters.hpp"
-#include "integer_codes.hpp"
-#include "building_util.hpp"
-#include "ef/ef_sequence.hpp"
-
-namespace autocomplete {
-
-template <typename ListType>
-struct forward_index {
-    typedef ListType forward_list_type;
-    typedef typename forward_list_type::iterator forward_list_iterator_type;
-    typedef uncompressed_list<uint8_t> permutation_list_type;
-    typedef
-        typename permutation_list_type::iterator permutation_list_iterator_type;
-
-    struct builder {
-        builder() {}
-
-        builder(parameters const& params)
-            : m_num_integers(0)
-            , m_num_terms(params.num_terms) {
-            essentials::logger("building forward_index...");
-
-            uint64_t num_completions = params.num_completions;
-
-            std::ifstream input(
-                (params.collection_basename + ".forward").c_str(),
-                std::ios_base::in);
-
-            std::vector<id_type> list;
-            std::vector<id_type> sorted_permutation;
-            std::vector<uint8_t> permutation;
-
-            m_pointers.push_back(0);
-
-            for (uint64_t i = 0; i != num_completions; ++i) {
-                list.clear();
-                sorted_permutation.clear();
-                permutation.clear();
-
-                uint32_t n = 0;
-                input >> n;
-                assert(n > 0 and n < constants::MAX_NUM_TERMS_PER_QUERY);
-                m_num_integers += n;
-                list.reserve(n);
-                sorted_permutation.reserve(n);
-
-                for (uint64_t k = 0; k != n; ++k) {
-                    id_type x;
-                    input >> x;
-                    list.push_back(x);
-                    sorted_permutation.push_back(k);
-                }
-
-                write_gamma_nonzero(m_bvb, n);
-                if (ListType::is_byte_aligned) util::push_pad(m_bvb);
-
-                std::sort(
-                    sorted_permutation.begin(), sorted_permutation.end(),
-                    [&](id_type l, id_type r) { return list[l] < list[r]; });
-
-                permutation.resize(n);
-                for (uint32_t i = 0; i != n; ++i) {
-                    permutation[sorted_permutation[i]] = i;
-                }
-
-                std::sort(list.begin(), list.end());
-                forward_list_type::build(m_bvb, list.begin(), m_num_terms + 1,
-                                         n);
-                util::push_pad(m_bvb);
-                m_pointers.push_back(m_bvb.size());
-
-                permutation_list_type::build(m_bvb, permutation.begin(), n + 1,
-                                             n);
-                m_pointers.push_back(m_bvb.size());
-            }
-
-            m_pointers.pop_back();
-            input.close();
-            essentials::logger("DONE");
-        }
-
-        void swap(forward_index::builder& other) {
-            std::swap(other.m_num_integers, m_num_integers);
-            std::swap(other.m_num_terms, m_num_terms);
-            other.m_pointers.swap(m_pointers);
-            other.m_bvb.swap(m_bvb);
-        }
-
-        void build(forward_index<ListType>& fi) {
-            fi.m_num_integers = m_num_integers;
-            fi.m_num_terms = m_num_terms;
-            fi.m_pointers.build(m_pointers);
-            fi.m_data.build(&m_bvb);
-            builder().swap(*this);
-        }
-
-    private:
-        uint64_t m_num_integers;
-        uint64_t m_num_terms;
-        std::vector<uint64_t> m_pointers;
-        bit_vector_builder m_bvb;
-    };
-
-    forward_index() {}
-
-    bool intersects(id_type doc_id, range r) {
-        return get(doc_id).intersects(r);
-    }
-
-    struct permuting_iterator_type {
-        permuting_iterator_type(forward_list_iterator_type const& sorted,
-                                permutation_list_iterator_type const& permuted)
-            : m_i(0)
-            , m_sorted(sorted)
-            , m_permuted(permuted) {
-            assert(sorted.size() == permuted.size());
-        }
-
-        uint32_t size() const {
-            return m_sorted.size();
-        }
-
-        id_type operator*() {
-            return m_sorted.access(m_permuted.access(m_i));
-        }
-
-        void operator++() {
-            ++m_i;
-        }
-
-    private:
-        uint32_t m_i;
-        forward_list_iterator_type m_sorted;
-        permutation_list_iterator_type m_permuted;
-    };
-
-    permuting_iterator_type iterator(id_type doc_id) {
-        uint64_t offset = m_pointers.access(doc_id * 2);
-        bits_iterator<bit_vector> it(m_data, offset);
-        uint64_t n = read_gamma_nonzero(it);
-        if (ListType::is_byte_aligned) util::eat_pad(it);
-        forward_list_iterator_type it_sorted(m_data, it.position(),
-                                             m_num_terms + 1, n);
-        offset = m_pointers.access(doc_id * 2 + 1);
-        permutation_list_iterator_type it_permutation(m_data, offset, n + 1, n);
-        return permuting_iterator_type(it_sorted, it_permutation);
-    }
-
-    uint64_t num_integers() const {
-        return m_num_integers;
-    }
-
-    uint64_t num_terms() const {
-        return m_num_terms;
-    }
-
-    uint64_t num_docs() const {
-        return m_pointers.size();
-    }
-
-    size_t data_bytes() const {
-        return m_data.bytes();
-    }
-
-    size_t pointer_bytes() const {
-        return m_pointers.bytes();
-    }
-
-    size_t bytes() const {
-        return essentials::pod_bytes(m_num_integers) +
-               essentials::pod_bytes(m_num_terms) + m_pointers.bytes() +
-               m_data.bytes();
-    }
-
-    template <typename Visitor>
-    void visit(Visitor& visitor) {
-        visitor.visit(m_num_integers);
-        visitor.visit(m_num_terms);
-        visitor.visit(m_pointers);
-        visitor.visit(m_data);
-    }
-
-private:
-    uint64_t m_num_integers;
-    uint64_t m_num_terms;
-    ef::ef_sequence m_pointers;
-    bit_vector m_data;
-
-    forward_list_iterator_type get(id_type doc_id) {
-        uint64_t offset = m_pointers.access(doc_id * 2);
-        bits_iterator<bit_vector> it(m_data, offset);
-        uint64_t n = read_gamma_nonzero(it);
-        if (ListType::is_byte_aligned) util::eat_pad(it);
-        return {m_data, it.position(), m_num_terms + 1, n};
-    }
-};
-
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/include/types.hpp b/include/types.hpp
deleted file mode 100644
index 1083cfc..0000000
--- a/include/types.hpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#pragma once
-
-#include "completion_trie.hpp"
-#include "fc_dictionary.hpp"
-#include "integer_fc_dictionary.hpp"
-#include "uint_vec.hpp"
-#include "unsorted_list.hpp"
-#include "uncompressed_list.hpp"
-
-#include "forward_index.hpp"
-#include "compact_forward_index.hpp"
-#include "delta_forward_index.hpp"
-
-#include "inverted_index.hpp"
-#include "blocked_inverted_index.hpp"
-
-#include "autocomplete.hpp"
-#include "autocomplete2.hpp"
-#include "autocomplete3.hpp"
-#include "autocomplete4.hpp"
-
-#include "compact_vector.hpp"
-#include "ef/ef_sequence.hpp"
-#include "ef/compact_ef.hpp"
-#include "succinct_rmq/cartesian_tree.hpp"
-
-namespace autocomplete {
-
-typedef uint_vec<uint32_t> uint32_vec;
-typedef uint_vec<uint64_t> uint64_vec;
-
-// typedef completion_trie<uint64_vec, uint32_vec, uint32_vec, uint32_vec>
-//     uint64_completion_trie;
-
-typedef completion_trie<ef::ef_sequence, ef::ef_sequence, ef::ef_sequence,
-                        ef::ef_sequence>
-    ef_completion_trie;
-
-typedef fc_dictionary<> fc_dictionary_type;
-typedef integer_fc_dictionary<> integer_fc_dictionary_type;
-
-typedef unsorted_list<cartesian_tree> succinct_rmq;
-typedef uncompressed_list<uint32_t> uncompressed_list32_t;
-
-// typedef inverted_index<uncompressed_list32_t> uncompressed_inverted_index;
-typedef inverted_index<ef::compact_ef> ef_inverted_index;
-
-// typedef forward_index<uncompressed_list32_t> uncompressed_forward_index;
-// typedef forward_index<ef::compact_ef> ef_forward_index;
-
-// typedef blocked_inverted_index<uncompressed_list32_t>
-//     uncompressed_blocked_inverted_index;
-typedef blocked_inverted_index<ef::compact_ef> ef_blocked_inverted_index;
-
-// typedef autocomplete<uint64_completion_trie, succinct_rmq,
-// fc_dictionary_type,
-//                      uncompressed_inverted_index, uncompressed_forward_index>
-//     uncompressed_autocomplete_type;
-
-// typedef autocomplete2<integer_fc_dictionary_type, succinct_rmq,
-//                       fc_dictionary_type, uncompressed_inverted_index>
-//     uncompressed_autocomplete_type2;
-
-/* compressed indexes */
-typedef autocomplete<ef_completion_trie, succinct_rmq, fc_dictionary_type,
-                     ef_inverted_index, compact_forward_index>
-    ef_autocomplete_type1;
-
-typedef autocomplete2<integer_fc_dictionary_type, succinct_rmq,
-                      fc_dictionary_type, ef_inverted_index>
-    ef_autocomplete_type2;
-
-typedef autocomplete3<integer_fc_dictionary_type, succinct_rmq,
-                      fc_dictionary_type, ef_inverted_index>
-    ef_autocomplete_type3;
-
-typedef autocomplete4<integer_fc_dictionary_type, succinct_rmq,
-                      fc_dictionary_type, ef_blocked_inverted_index>
-    ef_autocomplete_type4;
-}  // namespace autocomplete
\ No newline at end of file
diff --git a/results/README.md b/results/README.md
deleted file mode 100644
index 7e6ba77..0000000
--- a/results/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-Test machine
-------------
-
-4 Intel i7-7700 cores (@3.6 GHz); 64 GB of RAM DDR3 (@2.133 GHz); running Linux 4.4.0 (64 bits); 32K for both instruction and data L1 cache; 256K for L2 cache; 8192K for L3 cache.
-
-Compiler
---------
-
-gcc 7.4.0
-
-`cmake .. -DCMAKE_BUILD_TYPE=Release -DUSE_SANITIZERS=OFF -DUSE_INTRINSICS=ON -DUSE_PDEP=ON`
-
-
-Experiments
------------
-
-- The file `space.md` reports the space breakdowns.
-- The file `prefix_topk.md` reports the timing breakdowns for the prefix_topk step by varying the number of query terms.
-- The file `conjunctive_topk.md` reports the timing breakdowns for the conjunctive_topk step by varying the number of query terms.
-- The file `topk.md` reports the total time of the `topk` operation (combining the two steps, `prefix_topk` and `conjunctive_topk`) by varying the number of query terms.
-- The file `fc_dictionary.md` reports on the `fc_dictionary` benchmark.
-- The file `integer_fc_dictionary.md` reports on the `integer_fc_dictionary` benchmark.
\ No newline at end of file
diff --git a/results/conjunctive_topk.md b/results/conjunctive_topk.md
deleted file mode 100644
index 3d9747b..0000000
--- a/results/conjunctive_topk.md
+++ /dev/null
@@ -1,107 +0,0 @@
-Conjunctive top-k
------------------
-
-Executing queries shuffled at random, for k = 7.
-
-Average among 10 runs.
-
-From the last token of the query, we only retain the first character. This means that we spend less in obtaining the lexicographic range of the character (string comparisons are
-very fast), but we spend more on the RMQ phase, because the
-range obtained from the completion trie can be very large.
-
-### AOL
-
-#### Solution 1
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "dictionary_search_ns_per_query": "3", "conjunctive_search_ns_per_query": "2896", "reporting_ns_per_query": "352"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "52", "dictionary_search_ns_per_query": "10", "conjunctive_search_ns_per_query": "2273", "reporting_ns_per_query": "2333"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "919", "dictionary_search_ns_per_query": "39", "conjunctive_search_ns_per_query": "20478", "reporting_ns_per_query": "1772"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1298", "dictionary_search_ns_per_query": "49", "conjunctive_search_ns_per_query": "27363", "reporting_ns_per_query": "974"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1857", "dictionary_search_ns_per_query": "42", "conjunctive_search_ns_per_query": "25484", "reporting_ns_per_query": "556"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "2239", "dictionary_search_ns_per_query": "34", "conjunctive_search_ns_per_query": "22070", "reporting_ns_per_query": "438"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2871", "dictionary_search_ns_per_query": "32", "conjunctive_search_ns_per_query": "18657", "reporting_ns_per_query": "465"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3774", "dictionary_search_ns_per_query": "30", "conjunctive_search_ns_per_query": "13967", "reporting_ns_per_query": "844"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "4463"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "6677"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "25503"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "31536"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "29973"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "27148"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "23630"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "20511"}
-
-If we do not check the forward index (thus erronously reporting the first k docids of the intersection), we have:
-
-    {"num_terms_per_query": "3", "num_queries": "50000", "conjunctive_search_ns_per_query": "10362"}
-    {"num_terms_per_query": "4", "num_queries": "50000", "conjunctive_search_ns_per_query": "21327"}
-    {"num_terms_per_query": "5", "num_queries": "50000", "conjunctive_search_ns_per_query": "23187"}
-    {"num_terms_per_query": "6", "num_queries": "50000",  "conjunctive_search_ns_per_query": "21259"}
-    {"num_terms_per_query": "7", "num_queries": "50000",  "conjunctive_search_ns_per_query": "18234"}
-    {"num_terms_per_query": "8+", "num_queries": "50000",  "conjunctive_search_ns_per_query": "13912"}
-
-We can see that the time for the `conjunctive_search` remains the same, except for the case with 3 terms.
-This suggests that the time needed to check the forward index is negligible compared to the one
-needed to produce the intersection. This can also be observed considering that the time for the case with 2 terms is very small: in this case we check the forward index for each doc in the inverted list of the first term.
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "dictionary_search_ns_per_query": "6", "conjunctive_search_ns_per_query": "3275", "reporting_ns_per_query": "330"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "109", "dictionary_search_ns_per_query": "36", "conjunctive_search_ns_per_query": "15770", "reporting_ns_per_query": "2485"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "932", "dictionary_search_ns_per_query": "52", "conjunctive_search_ns_per_query": "24290", "reporting_ns_per_query": "1780"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1388", "dictionary_search_ns_per_query": "55", "conjunctive_search_ns_per_query": "29056", "reporting_ns_per_query": "953"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1880", "dictionary_search_ns_per_query": "41", "conjunctive_search_ns_per_query": "26675", "reporting_ns_per_query": "541"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "2277", "dictionary_search_ns_per_query": "43", "conjunctive_search_ns_per_query": "22955", "reporting_ns_per_query": "421"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2762", "dictionary_search_ns_per_query": "37", "conjunctive_search_ns_per_query": "19437", "reporting_ns_per_query": "443"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3878", "dictionary_search_ns_per_query": "40", "conjunctive_search_ns_per_query": "14657", "reporting_ns_per_query": "814"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "4917"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "20361"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "28619"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "33140"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "30410"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "27477"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "24357"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "21042"}
-
-### MSN
-
-#### Solution 1
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "dictionary_search_ns_per_query": "6", "conjunctive_search_ns_per_query": "3021", "reporting_ns_per_query": "576"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "39", "dictionary_search_ns_per_query": "7", "conjunctive_search_ns_per_query": "2279", "reporting_ns_per_query": "1926"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "810", "dictionary_search_ns_per_query": "15", "conjunctive_search_ns_per_query": "12382", "reporting_ns_per_query": "1078"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1104", "dictionary_search_ns_per_query": "15", "conjunctive_search_ns_per_query": "13534", "reporting_ns_per_query": "526"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1737", "dictionary_search_ns_per_query": "11", "conjunctive_search_ns_per_query": "11424", "reporting_ns_per_query": "305"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "2049", "dictionary_search_ns_per_query": "10", "conjunctive_search_ns_per_query": "9565", "reporting_ns_per_query": "252"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2396", "dictionary_search_ns_per_query": "9", "conjunctive_search_ns_per_query": "8020", "reporting_ns_per_query": "324"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3431", "dictionary_search_ns_per_query": "9", "conjunctive_search_ns_per_query": "6199", "reporting_ns_per_query": "738"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "4982"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "6176"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "16236"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "17306"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "15591"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "13961"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "12980"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "12311"}
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "dictionary_search_ns_per_query": "6", "conjunctive_search_ns_per_query": "3722", "reporting_ns_per_query": "511"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "56", "dictionary_search_ns_per_query": "20", "conjunctive_search_ns_per_query": "15134", "reporting_ns_per_query": "2043"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "835", "dictionary_search_ns_per_query": "20", "conjunctive_search_ns_per_query": "15310", "reporting_ns_per_query": "1072"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1117", "dictionary_search_ns_per_query": "19", "conjunctive_search_ns_per_query": "14672", "reporting_ns_per_query": "517"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1704", "dictionary_search_ns_per_query": "14", "conjunctive_search_ns_per_query": "12384", "reporting_ns_per_query": "300"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "2164", "dictionary_search_ns_per_query": "13", "conjunctive_search_ns_per_query": "10222", "reporting_ns_per_query": "246"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2567", "dictionary_search_ns_per_query": "12", "conjunctive_search_ns_per_query": "8579", "reporting_ns_per_query": "305"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3670", "dictionary_search_ns_per_query": "12", "conjunctive_search_ns_per_query": "6644", "reporting_ns_per_query": "714"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5667"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "19144"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "18886"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "18109"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "16030"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "14423"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "13418"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "12779"}
\ No newline at end of file
diff --git a/results/fc_dictionary.md b/results/fc_dictionary.md
deleted file mode 100644
index 39e64b7..0000000
--- a/results/fc_dictionary.md
+++ /dev/null
@@ -1,40 +0,0 @@
-#### Results on the AOL querylog.
-
-	pibiri@rubino:~/autocomplete/build$ ./benchmark_fc_dictionary ../test_data/aol/aol.completions 1000000 < ../test_data/aol/aol.completions.dict_queries.1M.shuffled 
-	2019-10-14 14:54:24: loading queries...
-	2019-10-14 14:54:24: loaded 1000000 queries
-	2019-10-14 14:54:24: building fc_dictionary with bucket size 4...
-	2019-10-14 14:54:25: DONE
-	using 42938890 bytes
-	locate: 559.666 [ns/string]
-	extract: 165.846 [ns/string]
-	2019-10-14 14:54:32: building fc_dictionary with bucket size 8...
-	2019-10-14 14:54:33: DONE
-	using 38111527 bytes
-	locate: 515.359 [ns/string]
-	extract: 151.121 [ns/string]
-	2019-10-14 14:54:40: building fc_dictionary with bucket size 16...
-	2019-10-14 14:54:40: DONE
-	using 35270205 bytes
-	locate: 474.319 [ns/string]
-	extract: 138.07 [ns/string]
-	2019-10-14 14:54:47: building fc_dictionary with bucket size 32...
-	2019-10-14 14:54:47: DONE
-	using 33722303 bytes
-	locate: 490 [ns/string]
-	extract: 150.671 [ns/string]
-	2019-10-14 14:54:54: building fc_dictionary with bucket size 64...
-	2019-10-14 14:54:54: DONE
-	using 32910194 bytes
-	locate: 585.408 [ns/string]
-	extract: 197.131 [ns/string]
-	2019-10-14 14:55:03: building fc_dictionary with bucket size 128...
-	2019-10-14 14:55:03: DONE
-	using 32496375 bytes
-	locate: 812.441 [ns/string]
-	extract: 293.022 [ns/string]
-	2019-10-14 14:55:15: building fc_dictionary with bucket size 256...
-	2019-10-14 14:55:15: DONE
-	using 32286042 bytes
-	locate: 1283.83 [ns/string]
-	extract: 485.985 [ns/string]
\ No newline at end of file
diff --git a/results/integer_fc_dictionary.md b/results/integer_fc_dictionary.md
deleted file mode 100644
index 955afe0..0000000
--- a/results/integer_fc_dictionary.md
+++ /dev/null
@@ -1,31 +0,0 @@
-#### Results on the AOL querylog.
-
-	pibiri@rubino:~/autocomplete/build$ ./benchmark_integer_fc_dictionary ../test_data/aol/aol.completions 1000000
-	2019-10-14 15:28:12: building integer_fc_dictionary with bucket size 4...
-	2019-10-14 15:28:14: DONE
-	using 129855836 bytes
-	extract: 102.787 [ns/string]
-	2019-10-14 15:28:15: building integer_fc_dictionary with bucket size 8...
-	2019-10-14 15:28:18: DONE
-	using 112779868 bytes
-	extract: 98.9981 [ns/string]
-	2019-10-14 15:28:19: building integer_fc_dictionary with bucket size 16...
-	2019-10-14 15:28:21: DONE
-	using 102740006 bytes
-	extract: 103.745 [ns/string]
-	2019-10-14 15:28:22: building integer_fc_dictionary with bucket size 32...
-	2019-10-14 15:28:24: DONE
-	using 97266766 bytes
-	extract: 136.042 [ns/string]
-	2019-10-14 15:28:26: building integer_fc_dictionary with bucket size 64...
-	2019-10-14 15:28:28: DONE
-	using 94397632 bytes
-	extract: 207.699 [ns/string]
-	2019-10-14 15:28:30: building integer_fc_dictionary with bucket size 128...
-	2019-10-14 15:28:32: DONE
-	using 92933198 bytes
-	extract: 354.622 [ns/string]
-	2019-10-14 15:28:36: building integer_fc_dictionary with bucket size 256...
-	2019-10-14 15:28:38: DONE
-	using 92192244 bytes
-	extract: 651.357 [ns/string]
\ No newline at end of file
diff --git a/results/inverted_index_space.md b/results/inverted_index_space.md
deleted file mode 100644
index f3acd81..0000000
--- a/results/inverted_index_space.md
+++ /dev/null
@@ -1,19 +0,0 @@
-Inverted index compression
-----
-
-#### AOL
-
-	EF -- 17.1495 bits per element
-	PEF uniform -- 16.5788 bits per element
-	PEF opt -- 15.0967 bits per element
-	PFOR -- 15.2661 bits per element
-	BIC -- 14.1396 bits per element
-	Simple9 -- 21.8895 bits per element
-	Simple16 -- 21.7385 bits per element
-	VByte -- 20.9531 bits per element
-	Varint -- 21.996 bits per element
-	Gamma -- 23.6305 bits per element
-	Delta -- 19.2088 bits per element
-	Rice -- 19.4145 bits per element
-	DINT single -- 15.4204 bits per element
-	DINT multi -- 15.084 bits per element
\ No newline at end of file
diff --git a/results/prefix_topk.md b/results/prefix_topk.md
deleted file mode 100644
index 6404bc4..0000000
--- a/results/prefix_topk.md
+++ /dev/null
@@ -1,94 +0,0 @@
-Prefix top-k
-------------
-
-Executing queries shuffled at random, for k = 7.
-
-Average among 10 runs.
-
-From the last token of the query, we only retain the first character. This means that we spend less in obtaining the lexicographic range of the character (string comparisons are
-very fast), but we spend more on the RMQ phase, because the
-range obtained from the completion trie can be very large.
-
-### AOL
-
-#### Solution 1
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "completions_search_ns_per_query": "279", "topk_rmq_ns_per_query": "2887", "reporting_ns_per_query": "317"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "47", "completions_search_ns_per_query": "853", "topk_rmq_ns_per_query": "576", "reporting_ns_per_query": "1851"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "706", "completions_search_ns_per_query": "945", "topk_rmq_ns_per_query": "95", "reporting_ns_per_query": "717"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1055", "completions_search_ns_per_query": "1057", "topk_rmq_ns_per_query": "22", "reporting_ns_per_query": "332"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1495", "completions_search_ns_per_query": "1215", "topk_rmq_ns_per_query": "9", "reporting_ns_per_query": "325"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "1957", "completions_search_ns_per_query": "1434", "topk_rmq_ns_per_query": "3", "reporting_ns_per_query": "425"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2410", "completions_search_ns_per_query": "1581", "topk_rmq_ns_per_query": "2", "reporting_ns_per_query": "611"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3360", "completions_search_ns_per_query": "1888", "topk_rmq_ns_per_query": "2", "reporting_ns_per_query": "913"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5027"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "4974"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "3984"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "4137"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "4660"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "5335"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "5785"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "7394"}
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "completions_search_ns_per_query": "493", "topk_rmq_ns_per_query": "3072", "reporting_ns_per_query": "628"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "60", "completions_search_ns_per_query": "1078", "topk_rmq_ns_per_query": "589", "reporting_ns_per_query": "1897"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "675", "completions_search_ns_per_query": "1053", "topk_rmq_ns_per_query": "96", "reporting_ns_per_query": "730"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1047", "completions_search_ns_per_query": "1081", "topk_rmq_ns_per_query": "21", "reporting_ns_per_query": "320"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1367", "completions_search_ns_per_query": "1112", "topk_rmq_ns_per_query": "8", "reporting_ns_per_query": "244"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "1886", "completions_search_ns_per_query": "1139", "topk_rmq_ns_per_query": "3", "reporting_ns_per_query": "300"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2242", "completions_search_ns_per_query": "1166", "topk_rmq_ns_per_query": "3", "reporting_ns_per_query": "455"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3229", "completions_search_ns_per_query": "1205", "topk_rmq_ns_per_query": "2", "reporting_ns_per_query": "809"}
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5768"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "5625"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "4389"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "4421"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "4830"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "5336"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "5963"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "7104"}
-
-### MSN
-    
-#### Solution 1
-    
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "completions_search_ns_per_query": "403", "topk_rmq_ns_per_query": "3211", "reporting_ns_per_query": "509"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "33", "completions_search_ns_per_query": "784", "topk_rmq_ns_per_query": "312", "reporting_ns_per_query": "1287"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "596", "completions_search_ns_per_query": "906", "topk_rmq_ns_per_query": "49", "reporting_ns_per_query": "423"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1026", "completions_search_ns_per_query": "1015", "topk_rmq_ns_per_query": "11", "reporting_ns_per_query": "206"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1434", "completions_search_ns_per_query": "1114", "topk_rmq_ns_per_query": "5", "reporting_ns_per_query": "217"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "1938", "completions_search_ns_per_query": "1273", "topk_rmq_ns_per_query": "2", "reporting_ns_per_query": "330"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2362", "completions_search_ns_per_query": "1437", "topk_rmq_ns_per_query": "0", "reporting_ns_per_query": "545"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3186", "completions_search_ns_per_query": "1737", "topk_rmq_ns_per_query": "1", "reporting_ns_per_query": "873"}
-	
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5804"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "4006"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "3456"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "3873"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "4587"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "5030"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "5617"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "6957"}
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "parsing_ns_per_query": "0", "completions_search_ns_per_query": "697", "topk_rmq_ns_per_query": "3495", "reporting_ns_per_query": "1114"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "parsing_ns_per_query": "32", "completions_search_ns_per_query": "1038", "topk_rmq_ns_per_query": "321", "reporting_ns_per_query": "1384"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "parsing_ns_per_query": "547", "completions_search_ns_per_query": "1029", "topk_rmq_ns_per_query": "51", "reporting_ns_per_query": "455"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "parsing_ns_per_query": "1012", "completions_search_ns_per_query": "1038", "topk_rmq_ns_per_query": "11", "reporting_ns_per_query": "210"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "parsing_ns_per_query": "1318", "completions_search_ns_per_query": "1066", "topk_rmq_ns_per_query": "5", "reporting_ns_per_query": "172"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "parsing_ns_per_query": "1922", "completions_search_ns_per_query": "1077", "topk_rmq_ns_per_query": "1", "reporting_ns_per_query": "242"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "parsing_ns_per_query": "2213", "completions_search_ns_per_query": "1099", "topk_rmq_ns_per_query": "1", "reporting_ns_per_query": "425"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "parsing_ns_per_query": "3228", "completions_search_ns_per_query": "1124", "topk_rmq_ns_per_query": "0", "reporting_ns_per_query": "799"}
-	
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "6772"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "4646"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "3831"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "4108"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "4594"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "5080"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "5621"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "6775"}
\ No newline at end of file
diff --git a/results/space.md b/results/space.md
deleted file mode 100644
index 64ac1a2..0000000
--- a/results/space.md
+++ /dev/null
@@ -1,159 +0,0 @@
-AOL 2006 query log
-------------------
-
-10,142,395 distinct queries, whose ids have been assigned
-in decreasing frequency order (ties broken lexicographically).
-
-#### Solution 1
-
-	using 1.05555 [GiB]
-	  completions: 0.520278 [GiB] (49.2899%)
-	  unsorted docs list: 0.0409812 [GiB] (3.88246%)
-	  unsorted minimal docs list: 0.0154568 [GiB] (1.46434%)
-	  dictionary: 0.0328479 [GiB] (3.11194%)
-	  inverted index: 0.144273 [GiB] (13.6681%)
-		data: 33.0401 [bpi]
-		pointers: 8.13526 [bpi]
-	  forward index: 0.30171 [GiB] (28.5833%)
-		data: 42.6801 [bpi]
-		pointers: 42.8379 [bpi]
-	
-	
-	+ Elias-Fano
-	using 0.370675 [GiB]
-	  completions: 0.0867222 [GiB] (23.3958%)
-	  unsorted docs list: 0.0409812 [GiB] (11.0558%)
-	  unsorted minimal docs list: 0.0154568 [GiB] (4.1699%)
-	  dictionary: 0.0328479 [GiB] (8.86166%)
-	  inverted index: 0.0595939 [GiB] (16.0771%)
-		data: 15.7999 [bpi]
-		pointers: 1.20819 [bpi]
-	  forward index: 0.135073 [GiB] (36.4397%)
-		data: 32.866 [bpi]
-		pointers: 5.41964 [bpi]
-	
-	+ Elias-Fano and compact_forward_index
-	using 0.318008 [GiB]
-	  completions: 0.0867222 [GiB] (27.2704%)
-	  unsorted docs list: 0.0409812 [GiB] (12.8868%)
-	  unsorted minimal docs list: 0.0154568 [GiB] (4.86049%)
-	  dictionary: 0.0328479 [GiB] (10.3293%)
-	  inverted index: 0.0595939 [GiB] (18.7397%)
-		data: 15.7999 [bpi]
-		pointers: 1.20819 [bpi]
-	  forward index: 0.0824065 [GiB] (25.9133%)
-		data: 22 [bpi]
-		pointers: 1.35762 [bpi]
-	
-	+ Elias-Fano and delta_forward_index
-	using 0.350595 [GiB]
-	  completions: 0.086722 [GiB] (24.7356%)
-	  unsorted docs list: 0.0409812 [GiB] (11.689%)
-	  unsorted minimal docs list: 0.0154568 [GiB] (4.40872%)
-	  dictionary: 0.0328479 [GiB] (9.36919%)
-		data: 69.9866 [bps]
-		pointers: 3.76476 [bps]
-	  inverted index: 0.0595939 [GiB] (16.9979%)
-		data: 15.7999 [bpi]
-		pointers: 1.20819 [bpi]
-	  forward index: 0.114994 [GiB] (32.7995%)
-		data: 29.6008 [bpi]
-		pointers: 2.99348 [bpi]
-	
-	+ Elias-Fano + compact_forward_index + compact_unsorted_lists
-	using 0.304999 [GiB]
-	  completions: 0.086722 [GiB] (28.4335%)
-	  unsorted docs list: 0.0315353 [GiB] (10.3395%)
-	  unsorted minimal docs list: 0.0118937 [GiB] (3.89958%)
-	  dictionary: 0.0328479 [GiB] (10.7698%)
-		data: 69.9866 [bps]
-		pointers: 3.76476 [bps]
-	  inverted index: 0.0595939 [GiB] (19.539%)
-		data: 15.7999 [bpi]
-		pointers: 1.20819 [bpi]
-	  forward index: 0.0824065 [GiB] (27.0186%)
-		data: 22 [bpi]
-		pointers: 1.35762 [bpi]
-	
-#### Solution 2
-
-	using 0.377843 [GiB]
-	  completions: 0.0956838 [GiB] (25.3237%)
-	  unsorted docs list: 0.0409812 [GiB] (10.8461%)
-	  unsorted minimal docs list: 0.0154568 [GiB] (4.09079%)
-	  dictionary: 0.0330574 [GiB] (8.74898%)
-	  inverted index: 0.154881 [GiB] (40.9907%)
-	  map from docid to lexid: 0.0377834 [GiB] (9.99975%)
-	  
-	  
-	+ Elias-Fano
-	using 0.259893 [GiB]
-	  completions: 0.0956841 [GiB] (36.8168%)
-		data: 73.5086 [bps]
-		pointers: 7.52944 [bps]
-	  unsorted docs list: 0.0315353 [GiB] (12.134%)
-	  unsorted minimal docs list: 0.0118937 [GiB] (4.57639%)
-	  dictionary: 0.0328479 [GiB] (12.639%)
-		data: 69.9866 [bps]
-		pointers: 3.76476 [bps]
-	  inverted index: 0.0595939 [GiB] (22.9302%)
-		data: 15.7999 [bpi]
-		pointers: 1.20819 [bpi]
-	  map from docid to lexid: 0.0283376 [GiB] (10.9036%)
-  	  
-
-MSN 2006 query log
-------------------
-
-7,083,363 distinct queries, whose ids have been assigned
-in decreasing frequency order (ties broken lexicographically).
-
-#### Solution 1
-
-    using 0.769592 [GiB]
-      completion trie: 0.370163 [GiB] (48.0986%)
-      unsorted docs list: 0.0286179 [GiB] (3.71858%)
-      unsorted minimal docs list: 0.0104689 [GiB] (1.36031%)
-      dictionary: 0.0220881 [GiB] (2.87011%)
-      inverted index: 0.107578 [GiB] (13.9785%)
-      forward index: 0.230677 [GiB] (29.9739%)
-      
-     + compression 
-	 using 0.213269 [GiB]
-	  completions: 0.0617906 [GiB] (28.973%)
-	  unsorted docs list: 0.0211964 [GiB] (9.9388%)
-	  unsorted minimal docs list: 0.00775427 [GiB] (3.6359%)
-	  dictionary: 0.0219463 [GiB] (10.2904%)
-		data: 68.9954 [bps]
-		pointers: 3.7648 [bps]
-	  inverted index: 0.0429281 [GiB] (20.1286%)
-		data: 16.2938 [bpi]
-		pointers: 1.1785 [bpi]
-	  forward index: 0.0576538 [GiB] (27.0333%)
-		data: 22 [bpi]
-		pointers: 1.35605 [bpi]
- 
-#### Solution 2
-
-	using 0.263256 [GiB]
-	  completions: 0.0681158 [GiB] (25.8744%)
-	  unsorted docs list: 0.0286179 [GiB] (10.8708%)
-	  unsorted minimal docs list: 0.0104689 [GiB] (3.97669%)
-	  dictionary: 0.0220881 [GiB] (8.39036%)
-	  inverted index: 0.107578 [GiB] (40.8643%)
-	  map from docid to lexid: 0.0263876 [GiB] (10.0236%)
-	  
-	+ compression
-	using 0.180907 [GiB]
-	  completions: 0.0681161 [GiB] (37.6525%)
-		data: 75.0743 [bps]
-		pointers: 7.52946 [bps]
-	  unsorted docs list: 0.0211964 [GiB] (11.7167%)
-	  unsorted minimal docs list: 0.00775427 [GiB] (4.28633%)
-	  dictionary: 0.0219463 [GiB] (12.1312%)
-		data: 68.9954 [bps]
-		pointers: 3.7648 [bps]
-	  inverted index: 0.0429281 [GiB] (23.7293%)
-		data: 16.2938 [bpi]
-		pointers: 1.1785 [bpi]
-	  map from docid to lexid: 0.0189661 [GiB] (10.4839%)
\ No newline at end of file
diff --git a/results/topk.md b/results/topk.md
deleted file mode 100644
index b101b43..0000000
--- a/results/topk.md
+++ /dev/null
@@ -1,201 +0,0 @@
-Top-k
------------------
-
-Executing queries shuffled at random, for k = 7.
-
-Average among 10 runs.
-
-### AOL
-
-#### Solution 1
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5062"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "6725"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "24960"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "32761"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "31450"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "28812"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "25978"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "22785"}
-
-	+ Elias-Fano
-	{"num_terms_per_query": "1", "num_queries": "10000", "ns_per_query": "5614"}
-	{"num_terms_per_query": "2", "num_queries": "10000", "ns_per_query": "9767"}
-	{"num_terms_per_query": "3", "num_queries": "10000", "ns_per_query": "26999"}
-	{"num_terms_per_query": "4", "num_queries": "10000", "ns_per_query": "35428"}
-	{"num_terms_per_query": "5", "num_queries": "10000", "ns_per_query": "36073"}
-	{"num_terms_per_query": "6", "num_queries": "10000", "ns_per_query": "31718"}
-	{"num_terms_per_query": "7", "num_queries": "10000", "ns_per_query": "29992"}
-	{"num_terms_per_query": "8+", "num_queries": "10000", "ns_per_query": "27313"}
-
-	+ Elias-Fano and forward_index2
-	{"num_terms_per_query": "1", "num_queries": "10000", "ns_per_query": "5336"}
-	{"num_terms_per_query": "2", "num_queries": "10000", "ns_per_query": "7573"}
-	{"num_terms_per_query": "3", "num_queries": "10000", "ns_per_query": "26278"}
-	{"num_terms_per_query": "4", "num_queries": "10000", "ns_per_query": "35664"}
-	{"num_terms_per_query": "5", "num_queries": "10000", "ns_per_query": "35189"}
-	{"num_terms_per_query": "6", "num_queries": "10000", "ns_per_query": "32033"}
-	{"num_terms_per_query": "7", "num_queries": "10000", "ns_per_query": "29950"}
-	{"num_terms_per_query": "8+", "num_queries": "10000", "ns_per_query": "27332"}
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5812"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "12703"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "27307"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "33476"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "31403"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "28718"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "25728"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "22419"}
-
-	+ Elias-Fano
-	{"num_terms_per_query": "1", "num_queries": "10000", "ns_per_query": "5609"}
-	{"num_terms_per_query": "2", "num_queries": "10000", "ns_per_query": "10894"}
-	{"num_terms_per_query": "3", "num_queries": "10000", "ns_per_query": "27311"}
-	{"num_terms_per_query": "4", "num_queries": "10000", "ns_per_query": "34780"}
-	{"num_terms_per_query": "5", "num_queries": "10000", "ns_per_query": "33849"}
-	{"num_terms_per_query": "6", "num_queries": "10000", "ns_per_query": "30319"}
-	{"num_terms_per_query": "7", "num_queries": "10000", "ns_per_query": "28181"}
-	{"num_terms_per_query": "8+", "num_queries": "10000", "ns_per_query": "24757"}
-
-#### Solution 3
-
-	{"num_terms_per_query": "1", "num_queries": "1000", "ns_per_query": "5899"}
-	{"num_terms_per_query": "2", "num_queries": "1000", "ns_per_query": "12282007"}
-	{"num_terms_per_query": "3", "num_queries": "1000", "ns_per_query": "18393403"}
-	{"num_terms_per_query": "4", "num_queries": "1000", "ns_per_query": "15212918"}
-	{"num_terms_per_query": "5", "num_queries": "1000", "ns_per_query": "11852012"}
-	{"num_terms_per_query": "6", "num_queries": "1000", "ns_per_query": "7781194"}
-	{"num_terms_per_query": "7", "num_queries": "1000", "ns_per_query": "7939661"}
-	{"num_terms_per_query": "8+", "num_queries": "1000", "ns_per_query": "6980226"}
-
-	+ Elias-Fano
-	{"num_terms_per_query": "1", "num_queries": "1000", "ns_per_query": "6024"}
-	{"num_terms_per_query": "2", "num_queries": "1000", "ns_per_query": "20553345"}
-	{"num_terms_per_query": "3", "num_queries": "1000", "ns_per_query": "32495295"}
-	{"num_terms_per_query": "4", "num_queries": "1000", "ns_per_query": "30929833"}
-	{"num_terms_per_query": "5", "num_queries": "1000", "ns_per_query": "27103519"}
-	{"num_terms_per_query": "6", "num_queries": "1000", "ns_per_query": "19912460"}
-	{"num_terms_per_query": "7", "num_queries": "1000", "ns_per_query": "20956205"}
-	{"num_terms_per_query": "8+", "num_queries": "1000", "ns_per_query": "19643570"}
-
-#### Solution 4
-
-	c = 0.005
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6593"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "756944"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "2188766"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "1920720"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "2398355"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "1711205"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "2195672"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "2115028"}
-
-	c = 0.01
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6610"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "739838"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "2147339"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "1988980"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "2440435"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "1858965"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "2304761"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "2254481"}
-
-	c = 0.01, + Elias-Fano
-	{"num_terms_per_query": "1", "num_queries": "1000", "ns_per_query": "5879"}
-	{"num_terms_per_query": "2", "num_queries": "1000", "ns_per_query": "1754176"}
-	{"num_terms_per_query": "3", "num_queries": "1000", "ns_per_query": "3435481"}
-	{"num_terms_per_query": "4", "num_queries": "1000", "ns_per_query": "4442784"}
-	{"num_terms_per_query": "5", "num_queries": "1000", "ns_per_query": "4946228"}
-	{"num_terms_per_query": "6", "num_queries": "1000", "ns_per_query": "4818169"}
-	{"num_terms_per_query": "7", "num_queries": "1000", "ns_per_query": "5157776"}
-	{"num_terms_per_query": "8+", "num_queries": "1000", "ns_per_query": "5431935"}
-
-	c = 0.025
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6528"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "828082"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "2422803"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "2482018"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "2970064"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "2542134"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "2972710"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "2924603"}
-
-	c = 0.05
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6508"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "1059938"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "3046716"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "3528723"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "4037290"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "3850329"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "4371489"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "4648349"}
-
-	c = 0.1
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6584"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "1600869"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "4501125"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "5562030"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "6634491"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "6768321"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "7124462"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "7733525"}
-
-	c = 0.2
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "6589"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "2831409"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "7641806"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "9881857"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "11138148"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "11643908"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "11966417"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "12460833"}
-
-### MSN
-
-#### Solution 1
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "5823"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "6251"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "16502"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "18380"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "17044"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "15622"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "14709"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "14323"}
-
-#### Solution 2
-
-	{"num_terms_per_query": "1", "num_queries": "50000", "ns_per_query": "6837"}
-	{"num_terms_per_query": "2", "num_queries": "50000", "ns_per_query": "14469"}
-	{"num_terms_per_query": "3", "num_queries": "50000", "ns_per_query": "18670"}
-	{"num_terms_per_query": "4", "num_queries": "50000", "ns_per_query": "19144"}
-	{"num_terms_per_query": "5", "num_queries": "50000", "ns_per_query": "17109"}
-	{"num_terms_per_query": "6", "num_queries": "50000", "ns_per_query": "15738"}
-	{"num_terms_per_query": "7", "num_queries": "50000", "ns_per_query": "14810"}
-	{"num_terms_per_query": "8+", "num_queries": "50000", "ns_per_query": "14260"}
-
-
-#### Solution 3
-
-
-	{"num_terms_per_query": "1", "num_queries": "1000", "ns_per_query": "6666"}
-	{"num_terms_per_query": "2", "num_queries": "1000", "ns_per_query": "6635754"}
-	{"num_terms_per_query": "3", "num_queries": "1000", "ns_per_query": "8612266"}
-	{"num_terms_per_query": "4", "num_queries": "1000", "ns_per_query": "5290905"}
-	{"num_terms_per_query": "5", "num_queries": "1000", "ns_per_query": "3939319"}
-	{"num_terms_per_query": "6", "num_queries": "1000", "ns_per_query": "3035556"}
-	{"num_terms_per_query": "7", "num_queries": "1000", "ns_per_query": "3106875"}
-	{"num_terms_per_query": "8+", "num_queries": "1000", "ns_per_query": "3089917"}
-
-#### Solution 4 with c = 0.1
-
-	{"num_terms_per_query": "1", "num_queries": "100", "ns_per_query": "7496"}
-	{"num_terms_per_query": "2", "num_queries": "100", "ns_per_query": "1280652"}
-	{"num_terms_per_query": "3", "num_queries": "100", "ns_per_query": "3181191"}
-	{"num_terms_per_query": "4", "num_queries": "100", "ns_per_query": "3722226"}
-	{"num_terms_per_query": "5", "num_queries": "100", "ns_per_query": "4056810"}
-	{"num_terms_per_query": "6", "num_queries": "100", "ns_per_query": "4130288"}
-	{"num_terms_per_query": "7", "num_queries": "100", "ns_per_query": "4282750"}
-	{"num_terms_per_query": "8+", "num_queries": "100", "ns_per_query": "4205507"}
\ No newline at end of file
diff --git a/script/collect_results.py b/script/collect_results.py
deleted file mode 100644
index 9d0dd22..0000000
--- a/script/collect_results.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import sys, os
-
-type = sys.argv[1]
-exe = sys.argv[2] # prefix_top, conjunctive_topk, topk
-dataset_name = sys.argv[3]
-k = sys.argv[4]
-num_queries = sys.argv[5]
-collect_breakdowns = int(sys.argv[6]) # 0 or 1
-
-breakdown = ""
-if collect_breakdowns != 0:
-    breakdown = "--breakdown"
-
-output_filename = dataset_name + "." + exe + ".timings.json"
-
-for i in range(1, 8):
-    os.system("../build/benchmark_" + exe + " " + type + " " + k + " ../build/" + dataset_name + ".bin " + str(i) + " " + str(num_queries) + " " + breakdown + " < ../test_data/" + dataset_name + "/" + dataset_name + ".completions.length=" + str(i) + ".shuffled 2>> " + output_filename)
-os.system("../build/benchmark_" + exe + " " + type + " " + k + " ../build/" + dataset_name + ".bin 8+ " + str(num_queries) + " " + breakdown + " < ../test_data/" + dataset_name + "/" + dataset_name + ".completions.length=8+.shuffled 2>> " + output_filename)
diff --git a/script/collect_results_by_varying_percentage.py b/script/collect_results_by_varying_percentage.py
deleted file mode 100644
index cc1b9a0..0000000
--- a/script/collect_results_by_varying_percentage.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import sys, os
-
-type = sys.argv[1]
-index_filename = sys.argv[2]
-dataset_name = sys.argv[3]
-k = sys.argv[4]
-num_queries = sys.argv[5]
-collect_breakdowns = int(sys.argv[6]) # 0 or 1
-
-output_filename = dataset_name + "." + type
-
-breakdown = ""
-if collect_breakdowns != 0:
-    breakdown = "--breakdown"
-    output_filename += ".breakdown"
-
-output_filename += ".topk.timings.json"
-
-percentages = ["0.0", "0.25", "0.50", "0.75"]
-
-for perc in percentages:
-    for terms in range(2,8): # (1,8)
-        os.system("../build/benchmark_topk " + type + " " + k + " ../build/" + index_filename + " " + str(terms) + " " + str(num_queries) + " " + perc + " " + breakdown + " < ../test_data/" + dataset_name + "/" + dataset_name + ".completions.length=" + str(terms) + ".shuffled 2>> " + output_filename)
-    os.system("../build/benchmark_topk " + type + " " + k + " ../build/" + index_filename + " 8+ " + str(num_queries) + " " + perc + " " + breakdown + " < ../test_data/" + dataset_name + "/" + dataset_name + ".completions.length=8+.shuffled 2>> " + output_filename)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
deleted file mode 100644
index 0687354..0000000
--- a/test/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_executable(test_completion_trie test_completion_trie.cpp)
-add_executable(test_fc_dictionary test_fc_dictionary.cpp)
-add_executable(test_integer_fc_dictionary test_integer_fc_dictionary.cpp)
-add_executable(test_cartesian_tree test_cartesian_tree.cpp)
-add_executable(test_inverted_index test_inverted_index.cpp)
-add_executable(test_forward_index test_forward_index.cpp)
-add_executable(test_unsorted_list test_unsorted_list.cpp)
-add_executable(test_autocomplete test_autocomplete.cpp)
-add_executable(test_locate_prefix test_locate_prefix.cpp)
-add_executable(test_blocked_inverted_index test_blocked_inverted_index.cpp)
\ No newline at end of file
diff --git a/test/test_autocomplete.cpp b/test/test_autocomplete.cpp
deleted file mode 100644
index d4fcefa..0000000
--- a/test/test_autocomplete.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "statistics.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    // typedef uncompressed_autocomplete_type index_type;
-    // typedef ef_autocomplete_type index_type;
-    typedef ef_autocomplete_type2 index_type;
-
-    {
-        index_type index(params);
-        if (output_filename) {
-            essentials::logger("saving data structure to disk...");
-            essentials::save<index_type>(index, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            index_type index;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(index, output_filename);
-            essentials::logger("DONE");
-            index.print_stats();
-
-            {
-                essentials::logger("testing prefix_topk()...");
-                uint32_t k = 7;
-                std::vector<std::string> queries = {
-                    "a",        "10",          "african",
-                    "air",      "commercial",  "internet",
-                    "paris",    "somerset",    "the",
-                    "the new",  "the perfect", "the starting line",
-                    "yu gi oh", "for sale",    "dave mat",
-                    "florence", "florida be",  "for s",
-                    "for sa",   "for sal",     "for sale",
-                    "ford a",   "ford au",     "ford m",
-                    "ford mu",  "for",         "fo",
-                    "f",        "matt",        "fl",
-                    "florir",   "fly",         "the starting l",
-                    "floridaaa"};
-
-                for (auto& query : queries) {
-                    auto it = index.prefix_topk(query, k);
-                    std::cout << "top-" << it.size() << " completions for '"
-                              << query << "':\n";
-                    for (uint32_t i = 0; i != it.size(); ++i, ++it) {
-                        auto completion = *it;
-                        std::cout << "(" << completion.score << ", '";
-                        print(completion.string);
-                        std::cout << "')" << std::endl;
-                    }
-                }
-
-                essentials::logger("DONE");
-            }
-
-            {
-                essentials::logger("testing conjunctive_topk()...");
-                uint32_t k = 7;
-                std::vector<std::string> queries = {
-                    "dave mat", "florence", "florida be",    "for s",
-                    "for sa",   "for sal",  "for sale",      "ford a",
-                    "ford au",  "ford m",   "ford mu",       "for",
-                    "fo",       "f",        "matt",          "fl",
-                    "flor",     "fly",      "the starting l"};
-
-                for (auto& query : queries) {
-                    auto it = index.conjunctive_topk(query, k);
-                    std::cout << "top-" << it.size() << " completions for '"
-                              << query << "':\n";
-                    for (uint32_t i = 0; i != it.size(); ++i, ++it) {
-                        auto completion = *it;
-                        std::cout << "(" << completion.score << ", '";
-                        print(completion.string);
-                        std::cout << "')" << std::endl;
-                    }
-                }
-
-                essentials::logger("DONE");
-            }
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_blocked_inverted_index.cpp b/test/test_blocked_inverted_index.cpp
deleted file mode 100644
index 94fc274..0000000
--- a/test/test_blocked_inverted_index.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-    const float c = 0.01;
-
-    {
-        // build, print and write
-        ef_blocked_inverted_index::builder builder(params, c);
-        ef_blocked_inverted_index bii;
-        builder.build(bii);
-        std::cout << "using " << bii.bytes() << " bytes" << std::endl;
-        std::cout << "num docs " << bii.num_docs() << std::endl;
-        std::cout << "num terms " << bii.num_terms() << std::endl;
-    }
-
-    return 0;
-}
diff --git a/test/test_cartesian_tree.cpp b/test/test_cartesian_tree.cpp
deleted file mode 100644
index 0c4fd38..0000000
--- a/test/test_cartesian_tree.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <iostream>
-#include <functional>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    {
-        // build and write
-
-        // std::vector<uint32_t> doc_ids = {23, 2, 4,  0,  88, 23, 2, 4,  55, 3,
-        //                            7,  6, 90, 34, 2,  3,  1, 12, 23};
-
-        std::vector<uint32_t> doc_ids;
-        doc_ids.reserve(params.num_completions);
-        std::ifstream input(params.collection_basename + ".mapped",
-                            std::ios_base::in);
-        if (!input.good()) {
-            throw std::runtime_error("File not found");
-        }
-        completion_iterator it(params, input);
-        while (input) {
-            auto const& record = *it;
-            doc_ids.push_back(record.doc_id);
-            ++it;
-        }
-        input.close();
-
-        cartesian_tree rmq;
-        rmq.build(doc_ids, std::less<uint32_t>());
-        assert(rmq.size() == doc_ids.size());
-        std::cout << "using " << rmq.bytes() << " bytes" << std::endl;
-
-        if (output_filename) {
-            // essentials::print_size(rmq);
-            essentials::logger("saving data structure to disk...");
-            essentials::save<cartesian_tree>(rmq, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        // load and print
-        if (output_filename) {
-            cartesian_tree rmq;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(rmq, output_filename);
-            essentials::logger("DONE");
-
-            std::cout << "using " << rmq.bytes() << " bytes" << std::endl;
-
-            for (size_t i = 0; i != rmq.size(); ++i) {
-                for (size_t j = i; j != rmq.size(); ++j) {
-                    std::cout << "rmq[" << i << "," << j
-                              << "] = " << rmq.rmq(i, j) << std::endl;
-                }
-            }
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_completion_trie.cpp b/test/test_completion_trie.cpp
deleted file mode 100644
index 1aba989..0000000
--- a/test/test_completion_trie.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "statistics.hpp"
-
-using namespace autocomplete;
-
-struct completion_comparator {
-    bool operator()(completion_type const& lhs,
-                    completion_type const& rhs) const {
-        size_t l = 0;  // |lcp(lhs,rhs)|
-        while (l < lhs.size() - 1 and l < rhs.size() - 1 and lhs[l] == rhs[l]) {
-            ++l;
-        }
-        return lhs[l] < rhs[l];
-    }
-};
-
-range locate_prefix(std::vector<completion_type> const& completions,
-                    completion_type const& c) {
-    completion_comparator comp;
-    auto b = std::lower_bound(completions.begin(), completions.end(), c, comp);
-    uint64_t begin = std::distance(completions.begin(), b);
-    auto e = std::upper_bound(completions.begin() + begin, completions.end(), c,
-                              comp);
-    uint64_t end = std::distance(completions.begin(), e);
-    return {begin, end};
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    // typedef uint64_completion_trie completion_trie_type;
-    typedef ef_completion_trie completion_trie_type;
-
-    {
-        completion_trie_type::builder builder(params);
-        completion_trie_type ct;
-        builder.build(ct);
-        ct.print_stats();
-
-        if (output_filename) {
-            essentials::logger("saving data structure to disk...");
-            essentials::save<completion_trie_type>(ct, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            completion_trie_type ct;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(ct, output_filename);
-            essentials::logger("DONE");
-            // essentials::print_size(ct);
-            std::cout << "using " << ct.bytes() << " bytes" << std::endl;
-
-            std::vector<completion_type> completions;
-            completions.reserve(params.num_completions);
-            std::ifstream input(params.collection_basename + ".mapped",
-                                std::ios_base::in);
-            if (!input.good()) {
-                throw std::runtime_error("File not found");
-            }
-
-            completion_iterator it(params, input);
-            while (input) {
-                auto& record = *it;
-                completions.push_back(std::move(record.completion));
-                ++it;
-            }
-            input.close();
-
-            // check all completions
-            essentials::logger("testing is_member()...");
-            for (auto const& c : completions) {
-                if (!ct.is_member(c)) {
-                    print_completion(c);
-                    std::cout << " not found!" << std::endl;
-                    return 1;
-                }
-            }
-            essentials::logger("DONE...");
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_fc_dictionary.cpp b/test/test_fc_dictionary.cpp
deleted file mode 100644
index 3f79d1e..0000000
--- a/test/test_fc_dictionary.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-id_type locate(std::vector<std::string> const& terms, std::string const& t) {
-    return std::distance(terms.begin(),
-                         std::lower_bound(terms.begin(), terms.end(), t)) +
-           1;
-}
-
-range locate_prefix(std::vector<std::string> const& terms,
-                    std::string const& p) {
-    auto comp_l = [](std::string const& l, std::string const& r) {
-        if (l.size() < r.size()) {
-            return strncmp(l.c_str(), r.c_str(), l.size()) <= 0;
-        }
-        return strcmp(l.c_str(), r.c_str()) < 0;
-    };
-
-    auto comp_r = [](std::string const& l, std::string const& r) {
-        if (l.size() < r.size()) {
-            return strncmp(l.c_str(), r.c_str(), l.size()) < 0;
-        }
-        return strcmp(l.c_str(), r.c_str()) < 0;
-    };
-
-    range r;
-    r.begin = std::distance(
-        terms.begin(), std::lower_bound(terms.begin(), terms.end(), p, comp_l));
-    r.end =
-        std::distance(terms.begin(),
-                      std::upper_bound(terms.begin(), terms.end(), p, comp_r)) -
-        1;
-
-    return r;
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    {
-        // build, print and write
-        fc_dictionary_type::builder builder(params);
-        fc_dictionary_type dict;
-        builder.build(dict);
-        std::cout << "using " << dict.bytes() << " bytes" << std::endl;
-
-        if (output_filename) {
-            // essentials::print_size(dict);
-            essentials::logger("saving data structure to disk...");
-            essentials::save<fc_dictionary_type>(dict, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            fc_dictionary_type dict;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(dict, output_filename);
-            essentials::logger("DONE");
-            // essentials::print_size(dict);
-            std::cout << "using " << dict.bytes() << " bytes" << std::endl;
-
-            // test locate() and extract for all strings
-            std::vector<std::string> terms;
-            terms.reserve(params.num_terms);
-            std::ifstream input((params.collection_basename + ".dict").c_str(),
-                                std::ios_base::in);
-            if (!input.good()) {
-                throw std::runtime_error("File not found");
-            }
-            std::string term;
-            term.reserve(256 + 1);
-            input >> term;
-            while (input) {
-                terms.push_back(std::move(term));
-                input >> term;
-            }
-            input.close();
-
-            std::cout << "terms.size() " << terms.size() << std::endl;
-
-            std::vector<uint8_t> decoded(2 *
-                                         constants::MAX_NUM_CHARS_PER_QUERY);
-
-            for (auto const& t : terms) {
-                id_type expected = locate(terms, t);
-                id_type got = dict.locate(string_to_byte_range(t));
-
-                std::cout << "locating term '" << t << "'" << std::endl;
-                if (got != expected) {
-                    std::cout << "Error: expected id " << expected << ","
-                              << " but got id " << got << std::endl;
-                    return 1;
-                }
-
-                std::cout << "extracting term '" << t << "'" << std::endl;
-                uint8_t string_len = dict.extract(got, decoded.data());
-
-                if (string_len != t.size()) {
-                    std::cout << "Error: expected size " << t.size() << ","
-                              << " but got size " << string_len << std::endl;
-                    return 1;
-                }
-
-                auto s = reinterpret_cast<char const*>(decoded.data());
-                for (uint8_t i = 0; i != string_len; ++i) {
-                    if (t[i] != s[i]) {
-                        std::cout << "Error: expected char " << t[i]
-                                  << " but got " << s[i] << std::endl;
-                        return 1;
-                    }
-                }
-
-                std::cout << "lexicographic id of '" << t << "' is " << got
-                          << std::endl;
-            }
-
-            // test locate_prefix() for all strings
-            std::string prefix;
-            prefix.reserve(256 + 1);
-            for (auto const& t : terms) {
-                uint32_t n = t.size();
-                for (uint32_t prefix_len = 1; prefix_len <= n; ++prefix_len) {
-                    prefix.clear();
-                    for (uint32_t i = 0; i != prefix_len; ++i) {
-                        prefix.push_back(t[i]);
-                    }
-
-                    std::cout << "locating prefix '" << prefix << "'"
-                              << std::endl;
-                    range expected = locate_prefix(terms, prefix);
-                    range got =
-                        dict.locate_prefix(string_to_byte_range(prefix));
-
-                    if ((got.begin != expected.begin) or
-                        (got.end != expected.end)) {
-                        std::cout << "Error for prefix '" << prefix
-                                  << "' : expected [" << expected.begin << ","
-                                  << expected.end << "] but got [" << got.begin
-                                  << "," << got.end << "]" << std::endl;
-                        return 1;
-                    }
-
-                    std::cout << "prefix range of '" << prefix << "' is ["
-                              << got.begin << "," << got.end << "]"
-                              << std::endl;
-                }
-            }
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_forward_index.cpp b/test/test_forward_index.cpp
deleted file mode 100644
index 576215d..0000000
--- a/test/test_forward_index.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    typedef compact_forward_index forward_index_type;
-
-    {
-        forward_index_type::builder builder(params);
-        forward_index_type index;
-        builder.build(index);
-        std::cout << "using " << index.bytes() << " bytes" << std::endl;
-        std::cout << "num docs " << index.num_docs() << std::endl;
-        std::cout << "num terms " << index.num_terms() << std::endl;
-
-        if (output_filename) {
-            essentials::logger("saving data structure to disk...");
-            essentials::save<forward_index_type>(index, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            forward_index_type index;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(index, output_filename);
-            essentials::logger("DONE");
-            std::cout << "using " << index.bytes() << " bytes" << std::endl;
-            std::cout << "num docs " << index.num_docs() << std::endl;
-            std::cout << "num terms " << index.num_terms() << std::endl;
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_integer_fc_dictionary.cpp b/test/test_integer_fc_dictionary.cpp
deleted file mode 100644
index 4f78052..0000000
--- a/test/test_integer_fc_dictionary.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    {
-        // build, print and write
-        integer_fc_dictionary_type::builder builder(params);
-        integer_fc_dictionary_type dict;
-        builder.build(dict);
-        std::cout << "using " << dict.bytes() << " bytes" << std::endl;
-
-        if (output_filename) {
-            // essentials::print_size(dict);
-            essentials::logger("saving data structure to disk...");
-            essentials::save<integer_fc_dictionary_type>(dict, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            integer_fc_dictionary_type dict;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(dict, output_filename);
-            essentials::logger("DONE");
-            std::cout << "using " << dict.bytes() << " bytes" << std::endl;
-
-            {
-                essentials::logger("testing extract() and locate()...");
-                std::ifstream input(
-                    (params.collection_basename + ".mapped").c_str(),
-                    std::ios_base::in);
-                completion_iterator it(params, input);
-
-                completion_type decoded(2 * constants::MAX_NUM_TERMS_PER_QUERY);
-                for (id_type id = 0; id != params.num_completions; ++id, ++it) {
-                    auto const& expected = (*it).completion;
-                    assert(expected.size() > 0);
-                    uint8_t size = dict.extract(id, decoded);
-                    if (expected.size() - 1 != size) {
-                        std::cout << "Error in decoding the " << id
-                                  << "-th string: expected size "
-                                  << expected.size() - 1 << ","
-                                  << " but got size " << int(size) << std::endl;
-                        return 1;
-                    }
-
-                    for (uint8_t i = 0; i != size; ++i) {
-                        if (decoded[i] != expected[i]) {
-                            std::cout
-                                << "Error in decoding the " << id
-                                << "-th string: expected " << expected[i] << ","
-                                << " but got " << decoded[i] << " at position "
-                                << int(i) << std::endl;
-                            return 1;
-                        }
-                    }
-
-                    id_type got_id =
-                        dict.locate({decoded.data(), decoded.data() + size});
-                    if (got_id != id) {
-                        std::cout << "Error in locating the " << id
-                                  << "-th string: expected id " << id << ","
-                                  << " but got id " << got_id << std::endl;
-                        return 1;
-                    }
-                }
-
-                input.close();
-                essentials::logger("it's all good");
-            }
-
-            // {
-            //     uint64_completion_trie::builder builder(params);
-            //     uint64_completion_trie ct;
-            //     builder.build(ct);
-            //     std::cout << "using " << ct.bytes() << " bytes" << std::endl;
-
-            //     essentials::logger("testing locate_prefix()...");
-
-            //     std::ifstream input(
-            //         (params.collection_basename + ".mapped").c_str(),
-            //         std::ios_base::in);
-            //     completion_iterator it(params, input);
-
-            //     uint32_t num_checks =
-            //         std::min<uint32_t>(params.num_completions, 30000);
-
-            //     completion_type prefix;
-            //     for (uint32_t i = 0; i != num_checks; ++i, ++it) {
-            //         auto const& expected = (*it).completion;
-            //         assert(expected.size() > 0);
-
-            //         for (uint32_t prefix_len = 1;
-            //              prefix_len <= expected.size() - 1; ++prefix_len) {
-            //             prefix.clear();
-            //             for (uint32_t i = 0; i != prefix_len; ++i) {
-            //                 prefix.push_back(expected[i]);
-            //             }
-
-            //             range expected = ct.locate_prefix(prefix);
-            //             range got = dict.locate_prefix(
-            //                 completion_to_uint32_range(prefix));
-
-            //             if ((got.begin != expected.begin) or
-            //                 (got.end != expected.end - 1)) {
-            //                 std::cout << "Error for prefix ";
-            //                 print_completion(prefix);
-            //                 std::cout << ": expected [" << expected.begin <<
-            //                 ","
-            //                           << expected.end - 1 << "] but got ["
-            //                           << got.begin << "," << got.end << "]"
-            //                           << std::endl;
-            //                 return 1;
-            //             }
-
-            //             // std::cout << "prefix range of ";
-            //             // print_completion(prefix);
-            //             // std::cout << " is [" << got.begin << "," <<
-            //             got.end
-            //             //           << "]" << std::endl;
-            //         }
-            //     }
-
-            //     input.close();
-            //     essentials::logger("it's all good");
-            // }
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_inverted_index.cpp b/test/test_inverted_index.cpp
deleted file mode 100644
index 81f913e..0000000
--- a/test/test_inverted_index.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    typedef ef_inverted_index inverted_index_type;
-
-    {
-        // build, print and write
-        inverted_index_type::builder builder(params);
-        inverted_index_type index;
-        builder.build(index);
-        std::cout << "using " << index.bytes() << " bytes" << std::endl;
-        std::cout << "num docs " << index.num_docs() << std::endl;
-        std::cout << "num terms " << index.num_terms() << std::endl;
-
-        if (output_filename) {
-            essentials::logger("saving data structure to disk...");
-            essentials::save<inverted_index_type>(index, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            inverted_index_type index;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(index, output_filename);
-            essentials::logger("DONE");
-            std::cout << "using " << index.bytes() << " bytes" << std::endl;
-            std::cout << "num docs " << index.num_docs() << std::endl;
-            std::cout << "num terms " << index.num_terms() << std::endl;
-
-            std::vector<id_type> intersection(index.num_docs());  // at most
-            std::vector<id_type> term_ids;
-            term_ids.reserve(2);
-
-            // id_type i = 293;
-            // id_type j = 294;
-            // id_type i = 899;
-            // id_type j = 822;
-            id_type i = 2401599 - 1;
-            id_type j = 1752198 - 1;
-            term_ids.push_back(i + 1);
-            term_ids.push_back(j + 1);
-            // uint64_t size = index.intersect(term_ids, intersection);
-
-            {
-                std::cout << "intersection between " << i << " and " << j
-                          << " is: ";
-                uint32_t i = 0;
-                auto intersec_it = index.intersection_iterator(term_ids);
-                while (intersec_it.has_next()) {
-                    id_type doc_id = *intersec_it;
-                    std::cout << doc_id << " ";
-                    ++i;
-                    ++intersec_it;
-                }
-                std::cout << std::endl;
-            }
-
-            std::vector<id_type> a;
-            {
-                auto it = index.iterator(i);
-                a.resize(it.size());
-                for (uint32_t i = 0; i != a.size(); ++i) {
-                    a[i] = it.access(i);
-                }
-            }
-
-            std::vector<id_type> b;
-            {
-                auto it = index.iterator(j);
-                b.resize(it.size());
-                for (uint32_t i = 0; i != b.size(); ++i) {
-                    b[i] = it.access(i);
-                }
-            }
-
-            auto it = std::set_intersection(a.begin(), a.end(), b.begin(),
-                                            b.end(), intersection.begin());
-            intersection.resize(it - intersection.begin());
-            std::cout << "intersection between " << i << " and " << j
-                      << " is: ";
-            for (auto x : intersection) {
-                std::cout << x << " ";
-            }
-            std::cout << std::endl;
-
-            // for (uint32_t i = 1; i != index.num_terms() + 1; ++i) {
-            //     for (uint32_t j = i; j != index.num_terms() + 1; ++j) {
-            //         term_ids.clear();
-            //         term_ids.push_back(i);
-            //         term_ids.push_back(j);
-            //         uint64_t size = index.intersect(term_ids, intersection);
-            //         std::cout << "size of intersection between " << i << "
-            //         and "
-            //                   << j << " is " << size << std::endl;
-            //     }
-            // }
-        }
-    }
-
-    return 0;
-}
diff --git a/test/test_locate_prefix.cpp b/test/test_locate_prefix.cpp
deleted file mode 100644
index fd3dcb4..0000000
--- a/test/test_locate_prefix.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-#include <iostream>
-
-#include "types.hpp"
-#include "../benchmark/benchmark_common.hpp"
-
-using namespace autocomplete;
-
-range locate_prefix(std::vector<std::string> const& strings,
-                    std::string const& p) {
-    auto comp_l = [](std::string const& l, std::string const& r) {
-        if (l.size() < r.size()) {
-            return strncmp(l.c_str(), r.c_str(), l.size()) <= 0;
-        }
-        return strcmp(l.c_str(), r.c_str()) < 0;
-    };
-
-    auto comp_r = [](std::string const& l, std::string const& r) {
-        if (l.size() < r.size()) {
-            return strncmp(l.c_str(), r.c_str(), l.size()) < 0;
-        }
-        return strcmp(l.c_str(), r.c_str()) < 0;
-    };
-
-    range r;
-    r.begin = std::distance(
-        strings.begin(),
-        std::lower_bound(strings.begin(), strings.end(), p, comp_l));
-    r.end = std::distance(
-        strings.begin(),
-        std::upper_bound(strings.begin(), strings.end(), p, comp_r));
-
-    return r;
-}
-
-template <typename Dictionary, typename Index>
-int test_locate_prefix(Dictionary const& dict, Index const& index,
-                       std::vector<std::string> const& queries,
-                       std::vector<std::string> const& strings) {
-    for (auto const& query : queries) {
-        std::string query_copy = query;
-        range expected = locate_prefix(strings, query);
-
-        // std::cout << "query: '" << query << "'" << std::endl;
-        completion_type prefix;
-        byte_range suffix;
-        parse(dict, query_copy, prefix, suffix);
-
-        // print_completion(prefix);
-        // std::cout << std::endl;
-        // print(suffix);
-        // std::cout << std::endl;
-
-        range suffix_lex_range = dict.locate_prefix(suffix);
-        suffix_lex_range.begin += 1;
-        suffix_lex_range.end += 1;
-        range got = index.locate_prefix(prefix, suffix_lex_range);
-
-        if ((got.begin != expected.begin) or (got.end != expected.end)) {
-            std::cout << "Error for query '" << query << "': ";
-            std::cout << "expected [" << expected.begin << "," << expected.end
-                      << ") but got [" << got.begin << "," << got.end << ")"
-                      << std::endl;
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename>" << std::endl;
-        return 1;
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    fc_dictionary_type dict;
-    {
-        fc_dictionary_type::builder builder(params);
-        builder.build(dict);
-    }
-
-    std::vector<std::string> strings;
-
-    {
-        essentials::logger("loading all strings...");
-        std::string line;
-        strings.reserve(params.num_completions);
-        std::ifstream input((params.collection_basename).c_str(),
-                            std::ios_base::in);
-        for (uint32_t i = 0; i != params.num_completions; ++i) {
-            if (!std::getline(input, line)) break;
-            auto s = line.substr(line.find(' ') + 1, line.size());
-            strings.push_back(s);
-        }
-        input.close();
-        essentials::logger("loaded " + std::to_string(strings.size()) +
-                           " strings");
-    }
-
-    uint32_t max_num_queries = std::atoi(argv[2]);
-    std::vector<std::string> queries;
-    essentials::logger("loading queries...");
-    uint32_t num_queries =
-        load_queries(queries, max_num_queries, true, std::cin);
-    essentials::logger("loaded " + std::to_string(num_queries) + " queries");
-
-    {
-        // typedef uint64_completion_trie completion_trie_type;
-        typedef ef_completion_trie completion_trie_type;
-
-        completion_trie_type index;
-        {
-            completion_trie_type::builder builder(params);
-            builder.build(index);
-        }
-        essentials::logger("testing locate_prefix() for completion_trie...");
-        int ret = test_locate_prefix(dict, index, queries, strings);
-        if (ret) return 1;
-        essentials::logger("it's all good");
-    }
-
-    {
-        integer_fc_dictionary_type index;
-        {
-            integer_fc_dictionary_type::builder builder(params);
-            builder.build(index);
-        }
-        essentials::logger(
-            "testing locate_prefix() for integer_fc_dictionary...");
-        int ret = test_locate_prefix(dict, index, queries, strings);
-        if (ret) return 1;
-        essentials::logger("it's all good");
-    }
-
-    return 0;
-}
diff --git a/test/test_unsorted_list.cpp b/test/test_unsorted_list.cpp
deleted file mode 100644
index 9b9b000..0000000
--- a/test/test_unsorted_list.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <iostream>
-#include <functional>
-
-#include "types.hpp"
-
-using namespace autocomplete;
-
-static const uint32_t max_k = 15;
-static const uint32_t k = 10;
-static_assert(k <= max_k, "k must be less than max allowed");
-static const uint32_t num_queries = 10000;
-
-std::vector<id_type> naive_topk(std::vector<id_type> const& input, range r,
-                                uint32_t k) {
-    uint32_t range_len = r.end - r.begin;
-    std::vector<id_type> topk(range_len);
-    for (uint32_t i = 0; i != range_len; ++i) {
-        topk[i] = input[r.begin + i];
-    }
-    std::sort(topk.begin(), topk.begin() + range_len);
-    topk.resize(std::min<uint32_t>(k, range_len));
-    return topk;
-}
-
-std::vector<range> gen_random_queries(uint32_t num_queries,
-                                      uint32_t max_range_len) {
-    std::vector<range> queries;
-    queries.reserve(num_queries);
-    essentials::uniform_int_rng<uint32_t> random(0, max_range_len);
-    for (uint32_t i = 0; i != num_queries; ++i) {
-        uint32_t x = random.gen();
-        uint32_t y = random.gen();
-        range r;
-        if (y > x) {
-            r = {x, y};
-        } else {
-            r = {y, x};
-        }
-        queries.push_back(r);
-    }
-    return queries;
-}
-
-int main(int argc, char** argv) {
-    int mandatory = 2;
-    if (argc < mandatory) {
-        std::cout << argv[0] << " <collection_basename> [-o output_filename]"
-                  << std::endl;
-        return 1;
-    }
-
-    char const* output_filename = nullptr;
-
-    for (int i = mandatory; i != argc; ++i) {
-        if (std::string(argv[i]) == "-o") {
-            ++i;
-            output_filename = argv[i];
-        }
-    }
-
-    parameters params;
-    params.collection_basename = argv[1];
-    params.load();
-
-    std::vector<id_type> doc_ids;
-
-    {
-        // build and write
-        doc_ids.reserve(params.num_completions);
-        std::ifstream input(params.collection_basename + ".mapped",
-                            std::ios_base::in);
-        if (!input.good()) {
-            throw std::runtime_error("File not found");
-        }
-        completion_iterator it(params, input);
-        while (input) {
-            auto const& record = *it;
-            doc_ids.push_back(record.doc_id);
-            ++it;
-        }
-        input.close();
-
-        {
-            // must have all ids from 0 to doc_ids.size() - 1
-            std::vector<id_type> tmp = doc_ids;
-            std::sort(tmp.begin(), tmp.end());
-            for (id_type id = 0; id != doc_ids.size(); ++id) {
-                if (tmp[id] != id) {
-                    std::cout << "Error: id " << id << " not found"
-                              << std::endl;
-                    return 1;
-                }
-            }
-        }
-
-        succinct_rmq list;
-        list.build(doc_ids);
-        assert(list.size() == doc_ids.size());
-        std::cout << "using " << list.bytes() << " bytes" << std::endl;
-
-        if (output_filename) {
-            // essentials::print_size(list);
-            essentials::logger("saving data structure to disk...");
-            essentials::save<succinct_rmq>(list, output_filename);
-            essentials::logger("DONE");
-        }
-    }
-
-    {
-        if (output_filename) {
-            succinct_rmq list;
-            essentials::logger("loading data structure from disk...");
-            essentials::load(list, output_filename);
-            essentials::logger("DONE");
-
-            std::cout << "using " << list.bytes() << " bytes" << std::endl;
-
-            std::vector<id_type> topk(max_k);
-            auto queries = gen_random_queries(num_queries, doc_ids.size());
-            std::cout << "testing top-" << k << " " << num_queries
-                      << " random queries..." << std::endl;
-
-            for (auto q : queries) {
-                auto expected = naive_topk(doc_ids, q, k);
-                uint32_t num_elements = list.topk(q, k, topk);
-
-                if (expected.size() != num_elements) {
-                    std::cout << "Error: expected " << expected.size()
-                              << " topk elements but got " << num_elements
-                              << std::endl;
-                    return 1;
-                }
-
-                for (uint32_t i = 0; i != num_elements; ++i) {
-                    if (topk[i] != expected[i]) {
-                        std::cout << "Error: expected " << expected[i]
-                                  << " but got " << topk[i] << std::endl;
-                        return 1;
-                    }
-                }
-            }
-
-            std::cout << "it's all good" << std::endl;
-        }
-    }
-
-    return 0;
-}
diff --git a/test_data/partition_queries_by_length.py b/test_data/partition_queries_by_length.py
deleted file mode 100644
index f9cb561..0000000
--- a/test_data/partition_queries_by_length.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import sys
-import numpy as np
-
-input_filename = sys.argv[1]
-
-num_shards = 7
-files = [open(input_filename + ".length=" + str(i), "w") for i in range(1,num_shards + 1)]
-all_others = open(input_filename + ".length=" + str(num_shards + 1) + "+", "w")
-
-lines = 0
-with open(input_filename, 'r') as f:
-    for line in f:
-        x = line.rstrip('\n').split()
-        l = len(x) - 1
-
-        if l > num_shards:
-            all_others.write(line)
-        else:
-            files[l - 1].write(line)
-
-        lines += 1
-        if lines % 1000000 == 0:
-            print("processed " + str(lines) + " lines")
-
-for f in files:
-    f.close()
-all_others.close()