its_hub is a Python library for inference-time scaling of LLMs, focusing on mathematical reasoning tasks.
For comprehensive documentation, including installation guides, tutorials, and API reference, visit:
https://ai-innovation.team/its_hub
from its_hub.utils import SAL_STEP_BY_STEP_SYSTEM_PROMPT
from its_hub.lms import OpenAICompatibleLanguageModel, StepGeneration
from its_hub.algorithms import ParticleFiltering
from its_hub.integration.reward_hub import LocalVllmProcessRewardModel
# Initialize language model (requires vLLM server)
lm = OpenAICompatibleLanguageModel(
endpoint="http://localhost:8000/v1",
api_key="NO_API_KEY",
model_name="Qwen/Qwen2.5-Math-1.5B-Instruct",
system_prompt=SAL_STEP_BY_STEP_SYSTEM_PROMPT,
)
# Set up inference-time scaling
sg = StepGeneration("\n\n", 32, r"\boxed")
prm = LocalVllmProcessRewardModel(
model_name="Qwen/Qwen2.5-Math-PRM-7B",
device="cuda:0",
aggregation_method="prod"
)
scaling_alg = ParticleFiltering(sg, prm)
# Solve with inference-time scaling
result = scaling_alg.infer(lm, "Solve x^2 + 5x + 6 = 0", budget=8)
# Production
pip install its_hub
# Development
git clone https://github.com/Red-Hat-AI-Innovation-Team/its_hub.git
cd its_hub
pip install -e ".[dev]"
- 🔬 Multiple Algorithms: Particle Filtering, Best-of-N, Beam Search, Self-Consistency
- 🚀 OpenAI-Compatible API: Easy integration with existing applications
- 🧮 Math-Optimized: Built for mathematical reasoning with specialized prompts
- 📊 Benchmarking Tools: Compare algorithms on MATH500 and AIME-2024 datasets
- ⚡ Async Support: Concurrent generation with limits and error handling
git clone https://github.com/Red-Hat-AI-Innovation-Team/its_hub.git
cd its_hub
pip install -e ".[dev]"
pytest tests
For detailed documentation, visit: https://ai-innovation.team/its_hub