Skip to content

Configuration

rookiemann edited this page Apr 10, 2026 · 1 revision

Configuration

CacheConfig

The central configuration object:

from multi_turboquant import CacheConfig, CacheMethod

config = CacheConfig(
    # Compression methods
    k_method=CacheMethod.TURBO3,      # key cache compression
    v_method=CacheMethod.TURBO3,      # value cache compression

    # TriAttention (composable with any method)
    triattention_enabled=False,
    triattention_budget=4096,          # max tokens to keep
    triattention_window=512,           # recent tokens never evicted

    # Calibration file paths
    turboquant_metadata_path=None,     # path to turboquant_kv.json
    triattention_stats_path=None,      # path to triattention_stats.pt

    # Model info (for VRAM estimation)
    head_dim=128,
    num_kv_heads=8,
    num_layers=32,
)

Available cache methods

from multi_turboquant import CacheMethod

# TurboQuant (Walsh-Hadamard, requires calibration)
CacheMethod.TURBO2      # 2.25-bit, 7.1x compression
CacheMethod.TURBO3      # 3.25-bit, 4.9x compression
CacheMethod.TURBO4      # 4.25-bit, 3.8x compression

# TCQ (Trellis Coded, requires calibration)
CacheMethod.TURBO2_TCQ  # 2.25-bit, better quality than turbo2
CacheMethod.TURBO3_TCQ  # 3.25-bit, better quality than turbo3

# IsoQuant (quaternion rotation, NO calibration)
CacheMethod.ISO3        # 3.25-bit
CacheMethod.ISO4        # 4.25-bit

# PlanarQuant (Givens rotation, NO calibration)
CacheMethod.PLANAR3     # 3.25-bit
CacheMethod.PLANAR4     # 4.25-bit

# TriAttention (token eviction)
CacheMethod.TRIATTENTION

# Baselines
CacheMethod.FP16        # no compression
CacheMethod.Q8_0        # 8-bit

Config properties

config.is_symmetric       # True if K and V use the same method
config.is_k_only          # True if only K is compressed
config.needs_calibration  # True if any method needs calibration files
config.k_compression      # compression ratio for K cache
config.v_compression      # compression ratio for V cache
config.estimate_kv_bytes(context_length=8192)  # total KV bytes estimate
config.validate()         # returns list of warnings

Clone this wiki locally