-
Notifications
You must be signed in to change notification settings - Fork 5
Using as a Library
rookiemann edited this page Apr 10, 2026
·
1 revision
from multi_turboquant import CacheConfig, CacheMethod, get_preset
from multi_turboquant.integration import get_llamacpp_command
from multi_turboquant.hardware import detect_gpus
# Detect hardware
gpus = detect_gpus()
# Get a preset (or let the user choose)
config = get_preset("balanced")
# Generate the launch command
cmd = get_llamacpp_command(
config,
model_path="/opt/models/model.gguf",
port=8080,
)
# Start the server with subprocess
import subprocess
proc = subprocess.Popen(cmd)from multi_turboquant.integration import BridgeAdapter
adapter = BridgeAdapter(config)
options = adapter.get_ui_options()
# Returns list of dicts with value, label, group, needs_calibration, description
# Ready to populate a <select> elementfrom multi_turboquant import plan_agents
from multi_turboquant.hardware import detect_gpus
def on_config_change(model_size, agents, context):
"""Called when user adjusts sliders in the UI."""
gpus = [g.to_planner_dict() for g in detect_gpus()]
result = plan_agents(
gpus=gpus,
model_params_b=model_size,
desired_agents=agents,
desired_context=context,
)
return result.to_dict() # JSON-serializable for the frontendfrom multi_turboquant.calibration import auto_calibrate
# Call before starting inference with TurboQuant
results = auto_calibrate(config, "/opt/models/MyModel")
if results:
print(f"Generated: {results}")See multi_turboquant/integration/example_app_integration.py for a complete integration example showing UI dropdown, capacity planning, auto-calibration, and command generation.
Getting Started
Methods
Configuration
Planning
Integration
Reference