From 955fd5ee2d46894a0149c66e6ffe4edcd5bc4b11 Mon Sep 17 00:00:00 2001 From: Nikola Katsarov Date: Mon, 4 May 2026 15:05:26 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20credit=20pricing=20overhaul=20=E2=80=94?= =?UTF-8?q?=20per-model=20rates,=20margin,=20snapshots,=20max=20cap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restructures llm_pricing.php from flat input/output ints to per-model real rates ($USD/Mtok) with cache_read / cache_write_5m / cache_write_1h tiers, context_window, last_verified_at, and tier-based reservation multipliers (default 1.5 / nano 1.2 / heavy 2.0). Real prices now reflect Jan 2026 reality: - Opus 4.7 corrected to $5/$25 (was $15/$75 — 3× overpriced) - Sonnet 4.6 confirmed $3/$15 - Haiku 4.5 corrected to $1/$5 (was $0.80/$4) - GPT-5 added at $1.25/$10 - GPT-5 nano added at $0.05/$0.40 (was missing entirely) - Gemini 2.5 Flash refreshed to $0.30/$2.50 (was $0.075/$0.30) CostCalculator gains calculatePlatformCredits() returning structured {platform_credits, raw_cost_usd, billable_cost_usd, margin_applied} with cache_strategy parameter and per-call margin/max-cap overrides. estimatePlatformCredits() uses model.tier reservation multiplier. calculateCost() / estimateCost() back-compat preserved (BudgetEnforcement, PrismAiGateway, SkillCostCalculator, ContextHealthService unchanged). EnforceMaxCreditsPerCallAction wires into BudgetEnforcement middleware to throw InsufficientBudgetException BEFORE the Prism call when team's estimated cap would be exceeded — protects against runaway Opus calls. Team gains effectiveMarginMultiplier() + effectiveMaxCreditsPerCall() with config-default fallback (per-team override lives in cloud). TeamSettingsPage adds "Billing & Per-Call Limits" section gated on Schema::hasColumn so community edition gracefully hides cloud-only fields. Non-LLM cost categories (compute / outbound / storage / tool) scaffolded in config at usd_per_unit=0 — threaded through but not deducted yet. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../EnforceMaxCreditsPerCallAction.php | 63 ++++ app/Domain/Budget/Services/CostCalculator.php | 212 +++++++++++- app/Domain/Shared/Models/Team.php | 21 ++ .../AI/Middleware/BudgetEnforcement.php | 10 + app/Livewire/Teams/TeamSettingsPage.php | 80 +++++ config/llm_pricing.php | 327 ++++++++++++++---- .../CostCalculatorPlatformCreditsTest.php | 208 +++++++++++ 7 files changed, 826 insertions(+), 95 deletions(-) create mode 100644 app/Domain/Budget/Actions/EnforceMaxCreditsPerCallAction.php create mode 100644 tests/Unit/Domain/Budget/CostCalculatorPlatformCreditsTest.php diff --git a/app/Domain/Budget/Actions/EnforceMaxCreditsPerCallAction.php b/app/Domain/Budget/Actions/EnforceMaxCreditsPerCallAction.php new file mode 100644 index 00000000..6c559d82 --- /dev/null +++ b/app/Domain/Budget/Actions/EnforceMaxCreditsPerCallAction.php @@ -0,0 +1,63 @@ +find($teamId); + if (! $team) { + return; + } + + $cap = $team->effectiveMaxCreditsPerCall(); + if ($cap === null) { + return; + } + + $marginOverride = $team->effectiveMarginMultiplier(); + + $estimate = $this->costCalculator->estimatePlatformCredits( + provider: $provider, + model: $model, + estimatedInputTokens: $estimatedInputTokens, + maxOutputTokens: $maxOutputTokens, + cacheStrategy: $cacheStrategy, + marginOverride: $marginOverride, + ); + + if ($estimate > $cap) { + throw new InsufficientBudgetException( + "Estimated {$estimate} platform_credits exceeds team cap of {$cap}. ". + 'Increase max_credits_per_call in team settings or use a cheaper model.' + ); + } + } +} diff --git a/app/Domain/Budget/Services/CostCalculator.php b/app/Domain/Budget/Services/CostCalculator.php index cc81e141..cd316365 100644 --- a/app/Domain/Budget/Services/CostCalculator.php +++ b/app/Domain/Budget/Services/CostCalculator.php @@ -7,39 +7,154 @@ use App\Infrastructure\AI\Enums\BudgetPressureLevel; use App\Models\GlobalSetting; use Illuminate\Support\Facades\Cache; +use Illuminate\Support\Facades\Log; class CostCalculator { - public function calculateCost(string $provider, string $model, int $inputTokens, int $outputTokens): int - { - $pricing = $this->getPricing($provider, $model); + public const CACHE_STRATEGY_NONE = 'none'; + + public const CACHE_STRATEGY_5M = 'ephemeral_5m'; + + public const CACHE_STRATEGY_1H = 'ephemeral_1h'; - if (! $pricing) { + /** + * Back-compat: existing callers (BudgetEnforcement, PrismAiGateway, SkillCostCalculator) + * read cost_credits where 1 credit = $0.001 USD. + */ + public function calculateCost( + string $provider, + string $model, + int $inputTokens, + int $outputTokens, + int $cachedInputTokens = 0, + ?string $cacheStrategy = null, + ): int { + $rawCostUsd = $this->rawCostUsd($provider, $model, $inputTokens, $outputTokens, $cachedInputTokens, $cacheStrategy); + + if ($rawCostUsd <= 0.0) { return 0; } - $inputCost = (int) ceil(($inputTokens / 1000) * $pricing['input']); - $outputCost = (int) ceil(($outputTokens / 1000) * $pricing['output']); + $creditValueUsd = (float) config('llm_pricing.credit_value_usd', 0.001); - return $inputCost + $outputCost; + return (int) ceil($rawCostUsd / $creditValueUsd); } + /** + * Back-compat: existing reservation flow uses cost_credits. + */ public function estimateCost(string $provider, string $model, int $maxTokens): int { $pricing = $this->getPricing($provider, $model); - if (! $pricing) { + if ($pricing === null) { return 0; } - // Estimate: assume ~500 input tokens + full maxTokens output $estimatedInputTokens = 500; - $inputCost = (int) ceil(($estimatedInputTokens / 1000) * $pricing['input']); - $outputCost = (int) ceil(($maxTokens / 1000) * $pricing['output']); + $multiplier = $this->reservationMultiplierFor($pricing); + + $rawCostUsd = $this->rawCostUsd($provider, $model, $estimatedInputTokens, $maxTokens, 0, null); + + if ($rawCostUsd <= 0.0) { + return 0; + } - $multiplier = config('llm_pricing.reservation_multiplier', 1.5); + $creditValueUsd = (float) config('llm_pricing.credit_value_usd', 0.001); - return (int) ceil(($inputCost + $outputCost) * $multiplier); + return (int) ceil(($rawCostUsd / $creditValueUsd) * $multiplier); + } + + /** + * NEW — primary entry point for platform_credits deduction. + * + * @return array{ + * platform_credits:int, + * raw_cost_usd:float, + * billable_cost_usd:float, + * margin_applied:float, + * model_pricing:array|null + * } + */ + public function calculatePlatformCredits( + string $provider, + string $model, + int $inputTokens, + int $outputTokens, + int $cachedInputTokens = 0, + ?string $cacheStrategy = null, + ?float $marginOverride = null, + ?int $maxCapOverride = null, + ): array { + $pricing = $this->getPricing($provider, $model); + $rawCostUsd = $this->rawCostUsd($provider, $model, $inputTokens, $outputTokens, $cachedInputTokens, $cacheStrategy); + + $margin = $marginOverride ?? (float) config('llm_pricing.margin_multiplier', 1.30); + $billableCostUsd = $rawCostUsd * $margin; + + $usdPerCredit = (float) config('llm_pricing.usd_per_credit', 0.01); + $minCredits = (int) config('llm_pricing.min_credits_per_call', 1); + $configMax = config('llm_pricing.max_credits_per_call'); + $maxCap = $maxCapOverride ?? ($configMax !== null ? (int) $configMax : null); + + if ($rawCostUsd <= 0.0) { + return [ + 'platform_credits' => 0, + 'raw_cost_usd' => 0.0, + 'billable_cost_usd' => 0.0, + 'margin_applied' => $margin, + 'model_pricing' => $pricing, + ]; + } + + $rawCredits = (int) ceil($billableCostUsd / $usdPerCredit); + $platformCredits = max($minCredits, $rawCredits); + if ($maxCap !== null && $maxCap > 0) { + $platformCredits = min($platformCredits, $maxCap); + $platformCredits = max($minCredits, $platformCredits); + } + + return [ + 'platform_credits' => $platformCredits, + 'raw_cost_usd' => $rawCostUsd, + 'billable_cost_usd' => $billableCostUsd, + 'margin_applied' => $margin, + 'model_pricing' => $pricing, + ]; + } + + /** + * Pre-call platform-credit estimate for max-cap enforcement and reservation. + * Uses tier-specific reservation multiplier. + */ + public function estimatePlatformCredits( + string $provider, + string $model, + int $estimatedInputTokens, + int $maxOutputTokens, + ?string $cacheStrategy = null, + ?float $marginOverride = null, + ): int { + $pricing = $this->getPricing($provider, $model); + + if ($pricing === null) { + return (int) config('llm_pricing.min_credits_per_call', 1); + } + + $multiplier = $this->reservationMultiplierFor($pricing); + + $result = $this->calculatePlatformCredits( + provider: $provider, + model: $model, + inputTokens: $estimatedInputTokens, + outputTokens: $maxOutputTokens, + cachedInputTokens: 0, + cacheStrategy: $cacheStrategy, + marginOverride: $marginOverride, + maxCapOverride: null, + ); + + return (int) ceil($result['platform_credits'] * $multiplier); } public function getBudgetPressureLevel(string $teamId): BudgetPressureLevel @@ -58,25 +173,21 @@ public function getBudgetPressureLevel(string $teamId): BudgetPressureLevel private function calculateBudgetPressure(string $teamId): BudgetPressureLevel { - // Get team's latest balance from the most recent ledger entry $latestEntry = CreditLedger::withoutGlobalScopes() ->where('team_id', $teamId) ->orderByDesc('created_at') ->orderByDesc('id') ->first(['balance_after']); - // No ledger entries at all — team has never had credits, no pressure if (! $latestEntry) { return BudgetPressureLevel::None; } - // Get total purchased/refunded credits (the team's ceiling) $totalBudget = (int) CreditLedger::withoutGlobalScopes() ->where('team_id', $teamId) ->whereIn('type', [LedgerType::Purchase->value, LedgerType::Refund->value]) ->sum('amount'); - // No purchased credits — self-hosted/community install, no pressure if ($totalBudget <= 0) { return BudgetPressureLevel::None; } @@ -105,11 +216,74 @@ private function calculateBudgetPressure(string $teamId): BudgetPressureLevel return BudgetPressureLevel::None; } + private function rawCostUsd( + string $provider, + string $model, + int $inputTokens, + int $outputTokens, + int $cachedInputTokens, + ?string $cacheStrategy, + ): float { + $pricing = $this->getPricing($provider, $model); + + if ($pricing === null) { + Log::debug('cost_calculator.unknown_model', ['provider' => $provider, 'model' => $model]); + + return 0.0; + } + + $inputRate = (float) ($pricing['input_usd_per_mtok'] ?? 0); + $outputRate = (float) ($pricing['output_usd_per_mtok'] ?? 0); + $cacheReadRate = (float) ($pricing['cache_read_usd_per_mtok'] ?? $inputRate); + + $cachedInputTokens = max(0, min($cachedInputTokens, $inputTokens)); + $uncachedInput = $inputTokens - $cachedInputTokens; + + $cost = ($uncachedInput / 1_000_000.0) * $inputRate + + ($cachedInputTokens / 1_000_000.0) * $cacheReadRate + + ($outputTokens / 1_000_000.0) * $outputRate; + + if ($cacheStrategy === self::CACHE_STRATEGY_5M + && isset($pricing['cache_write_5m_usd_per_mtok'])) { + $cost += ($inputTokens / 1_000_000.0) * (float) $pricing['cache_write_5m_usd_per_mtok']; + } elseif ($cacheStrategy === self::CACHE_STRATEGY_1H + && isset($pricing['cache_write_1h_usd_per_mtok'])) { + $cost += ($inputTokens / 1_000_000.0) * (float) $pricing['cache_write_1h_usd_per_mtok']; + } + + return $cost; + } + /** - * @return array{input: int, output: int}|null + * @param array $pricing + */ + private function reservationMultiplierFor(array $pricing): float + { + $tier = (string) ($pricing['tier'] ?? 'default'); + $tiered = config("llm_pricing.reservation_multipliers.{$tier}"); + + if ($tiered !== null) { + return (float) $tiered; + } + + return (float) config('llm_pricing.reservation_multiplier', 1.5); + } + + /** + * @return array|null */ private function getPricing(string $provider, string $model): ?array { - return config("llm_pricing.providers.{$provider}.{$model}"); + $direct = config("llm_pricing.providers.{$provider}.{$model}"); + if ($direct !== null) { + return $direct; + } + + $wildcard = config("llm_pricing.providers.{$provider}.*"); + if ($wildcard !== null) { + return $wildcard; + } + + return null; } } diff --git a/app/Domain/Shared/Models/Team.php b/app/Domain/Shared/Models/Team.php index 9c51dad3..bfd30bca 100644 --- a/app/Domain/Shared/Models/Team.php +++ b/app/Domain/Shared/Models/Team.php @@ -19,6 +19,9 @@ * @property array|null $settings * @property string|null $plan * @property array|null $custom_limits + * @property string|null $sub_program_slug + * @property float|null $credit_margin_multiplier + * @property int|null $max_credits_per_call */ class Team extends Model { @@ -131,6 +134,24 @@ public function hasFeature(string $feature): bool return true; } + /** + * Community edition has no per-team override — falls through to config. + */ + public function effectiveMarginMultiplier(): float + { + return (float) config('llm_pricing.margin_multiplier', 1.30); + } + + /** + * Community edition has no per-team cap — falls through to config. + */ + public function effectiveMaxCreditsPerCall(): ?int + { + $configMax = config('llm_pricing.max_credits_per_call'); + + return $configMax !== null ? (int) $configMax : null; + } + /** * Look up the owner_id for a team without instantiating the model and * without relying on TeamScope. Used as a userId fallback in actions diff --git a/app/Infrastructure/AI/Middleware/BudgetEnforcement.php b/app/Infrastructure/AI/Middleware/BudgetEnforcement.php index c625d182..e25f301f 100644 --- a/app/Infrastructure/AI/Middleware/BudgetEnforcement.php +++ b/app/Infrastructure/AI/Middleware/BudgetEnforcement.php @@ -2,6 +2,7 @@ namespace App\Infrastructure\AI\Middleware; +use App\Domain\Budget\Actions\EnforceMaxCreditsPerCallAction; use App\Domain\Budget\Actions\ReserveBudgetAction; use App\Domain\Budget\Actions\SettleBudgetAction; use App\Domain\Budget\Exceptions\InsufficientBudgetException; @@ -17,6 +18,7 @@ public function __construct( private readonly CostCalculator $costCalculator, private readonly ReserveBudgetAction $reserveBudget, private readonly SettleBudgetAction $settleBudget, + private readonly EnforceMaxCreditsPerCallAction $enforceMaxCap, ) {} public function handle(AiRequestDTO $request, Closure $next): AiResponseDTO @@ -25,6 +27,14 @@ public function handle(AiRequestDTO $request, Closure $next): AiResponseDTO return $next($request); } + // Pre-call max-credits-per-call enforcement (throws before any LLM hit). + $this->enforceMaxCap->execute( + teamId: $request->teamId ?? '', + provider: $request->provider, + model: $request->model, + maxOutputTokens: $request->maxTokens, + ); + $estimatedCost = $this->costCalculator->estimateCost( provider: $request->provider, model: $request->model, diff --git a/app/Livewire/Teams/TeamSettingsPage.php b/app/Livewire/Teams/TeamSettingsPage.php index 06672b0d..dcafd6bf 100644 --- a/app/Livewire/Teams/TeamSettingsPage.php +++ b/app/Livewire/Teams/TeamSettingsPage.php @@ -65,6 +65,11 @@ class TeamSettingsPage extends Component // Approval settings public int $approvalTimeoutHours = 48; + // Billing & limits + public string $maxCreditsPerCall = ''; + + public string $creditMarginMultiplier = ''; + // Bridge routing public string $bridgeRoutingMode = 'auto'; @@ -164,6 +169,18 @@ public function mount(): void $this->approvalTimeoutHours = (int) ($settings['approval_timeout_hours'] ?? GlobalSetting::get('approval_timeout_hours', 48)); $this->chatbotEnabled = (bool) ($settings['chatbot_enabled'] ?? false); + // Billing & limits — only meaningful in cloud builds (columns may not exist in community). + if (\Illuminate\Support\Facades\Schema::hasColumn('teams', 'max_credits_per_call')) { + $this->maxCreditsPerCall = $team->max_credits_per_call !== null + ? (string) $team->max_credits_per_call + : ''; + } + if (\Illuminate\Support\Facades\Schema::hasColumn('teams', 'credit_margin_multiplier')) { + $this->creditMarginMultiplier = $team->credit_margin_multiplier !== null + ? (string) $team->credit_margin_multiplier + : ''; + } + // AI Features $this->autoSkillProposeEnabled = (bool) ($settings['auto_skill_propose_enabled'] ?? config('skills.auto_propose.enabled', true)); $this->autoSkillProposeMinStages = (int) ($settings['auto_skill_propose_min_stages'] ?? config('skills.auto_propose.min_stages', 5)); @@ -459,6 +476,69 @@ public function saveSessionTtl(): void session()->flash('message', 'Session TTL saved.'); } + public function saveCreditsLimits(): void + { + $this->authorize('manage-team', auth()->user()->currentTeam); + + if (! \Illuminate\Support\Facades\Schema::hasColumn('teams', 'max_credits_per_call')) { + session()->flash('error', 'Per-call credit limits are not available in this build.'); + + return; + } + + $this->validate([ + 'maxCreditsPerCall' => 'nullable|string', + ]); + + $value = trim($this->maxCreditsPerCall); + $intValue = $value === '' ? null : (int) $value; + + if ($intValue !== null && ($intValue < 1 || $intValue > 100_000)) { + $this->addError('maxCreditsPerCall', 'Must be between 1 and 100,000 (or empty for unlimited).'); + + return; + } + + auth()->user()->currentTeam->update([ + 'max_credits_per_call' => $intValue, + ]); + + session()->flash('message', 'Per-call credit limit saved.'); + } + + public function saveCreditMargin(): void + { + $user = auth()->user(); + if (! $user || ! ($user->is_super_admin ?? false)) { + abort(403, 'Super-admin role required to change margin multiplier.'); + } + + if (! \Illuminate\Support\Facades\Schema::hasColumn('teams', 'credit_margin_multiplier')) { + session()->flash('error', 'Margin override is not available in this build.'); + + return; + } + + $this->validate([ + 'creditMarginMultiplier' => 'nullable|string', + ]); + + $value = trim($this->creditMarginMultiplier); + $floatValue = $value === '' ? null : (float) $value; + + if ($floatValue !== null && ($floatValue < 0.5 || $floatValue > 3.0)) { + $this->addError('creditMarginMultiplier', 'Must be between 0.5 and 3.0 (or empty for default).'); + + return; + } + + $user->currentTeam->update([ + 'credit_margin_multiplier' => $floatValue, + ]); + + session()->flash('message', 'Credit margin multiplier saved.'); + } + public function saveApprovalSettings(): void { $this->validate([ diff --git a/config/llm_pricing.php b/config/llm_pricing.php index 29e28e27..60f13cb6 100644 --- a/config/llm_pricing.php +++ b/config/llm_pricing.php @@ -1,118 +1,267 @@ 0.001, + // 1 platform_credit price (customer-facing). Customer pays FleetQ per-credit at this rate. + 'usd_per_credit' => (float) env('FLEETQ_USD_PER_CREDIT', 0.01), + + // Default margin layer applied between raw provider cost and billable platform_credits. + // Per-team override via teams.credit_margin_multiplier. Per-call override via $marginOverride arg. + 'margin_multiplier' => (float) env('FLEETQ_MARGIN_MULTIPLIER', 1.30), + + // Min platform_credits per call — protects margin on nano models (€0.0005 raw → 1 credit charged). + 'min_credits_per_call' => (int) env('FLEETQ_MIN_CREDITS_PER_CALL', 1), + + // Optional safety cap. Per-team override via teams.max_credits_per_call. null = uncapped. + 'max_credits_per_call' => env('FLEETQ_MAX_CREDITS_PER_CALL') !== null + ? (int) env('FLEETQ_MAX_CREDITS_PER_CALL') + : null, + + 'usd_to_eur_rate' => (float) env('FLEETQ_USD_TO_EUR', 0.91), + + 'last_updated_at' => '2026-05-04', + + /* + |-------------------------------------------------------------------------- + | Reservation multipliers (by tier) + |-------------------------------------------------------------------------- + | + | Used by CostCalculator::estimatePlatformCredits() and BudgetEnforcement + | reservation. Different model classes have different output-length variance: + | nano calls converge tightly (1.2x); reasoning/heavy calls vary widely (2.0x). + | + */ + 'reservation_multipliers' => [ + 'default' => (float) env('FLEETQ_RESERVATION_DEFAULT', 1.5), + 'nano' => (float) env('FLEETQ_RESERVATION_NANO', 1.2), + 'heavy' => (float) env('FLEETQ_RESERVATION_HEAVY', 2.0), + ], + + /* + |-------------------------------------------------------------------------- + | Per-provider / per-model pricing + |-------------------------------------------------------------------------- + | + | Schema (all rates in $USD/Mtok unless noted): + | tier: 'default'|'nano'|'heavy' — selects reservation multiplier + | input_usd_per_mtok: standard input + | output_usd_per_mtok: generation output + | cache_read_usd_per_mtok: Anthropic prompt-caching read (reused tokens) + | cache_write_5m_usd_per_mtok: ephemeral 5-minute cache write surcharge + | cache_write_1h_usd_per_mtok: ephemeral 1-hour cache write surcharge + | context_window: tokens (used by ContextHealthService) + | last_verified_at: ISO date — staleness alerts fire if older than 90 days (P1) + | source_url: where rates were sourced (audit trail) + | + */ 'providers' => [ 'anthropic' => [ + 'claude-opus-4-7' => [ + 'tier' => 'heavy', + 'input_usd_per_mtok' => 5.00, + 'output_usd_per_mtok' => 25.00, + 'cache_read_usd_per_mtok' => 0.50, + 'cache_write_5m_usd_per_mtok' => 6.25, + 'cache_write_1h_usd_per_mtok' => 10.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', + ], + 'claude-opus-4-6' => [ + 'tier' => 'heavy', + 'input_usd_per_mtok' => 15.00, + 'output_usd_per_mtok' => 75.00, + 'cache_read_usd_per_mtok' => 1.50, + 'cache_write_5m_usd_per_mtok' => 18.75, + 'cache_write_1h_usd_per_mtok' => 30.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', + ], + 'claude-sonnet-4-6' => [ + 'tier' => 'default', + 'input_usd_per_mtok' => 3.00, + 'output_usd_per_mtok' => 15.00, + 'cache_read_usd_per_mtok' => 0.30, + 'cache_write_5m_usd_per_mtok' => 3.75, + 'cache_write_1h_usd_per_mtok' => 6.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', + ], + 'claude-sonnet-4-5' => [ + 'tier' => 'default', + 'input_usd_per_mtok' => 3.00, + 'output_usd_per_mtok' => 15.00, + 'cache_read_usd_per_mtok' => 0.30, + 'cache_write_5m_usd_per_mtok' => 3.75, + 'cache_write_1h_usd_per_mtok' => 6.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', + ], 'claude-sonnet-4-5-20250929' => [ - 'input' => 30, // $3.00/1M tokens - 'output' => 150, // $15.00/1M tokens + 'tier' => 'default', + 'input_usd_per_mtok' => 3.00, + 'output_usd_per_mtok' => 15.00, + 'cache_read_usd_per_mtok' => 0.30, + 'cache_write_5m_usd_per_mtok' => 3.75, + 'cache_write_1h_usd_per_mtok' => 6.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', ], - 'claude-haiku-4-5-20251001' => [ - 'input' => 8, // $0.80/1M tokens - 'output' => 40, // $4.00/1M tokens + 'claude-haiku-4-5' => [ + 'tier' => 'nano', + 'input_usd_per_mtok' => 1.00, + 'output_usd_per_mtok' => 5.00, + 'cache_read_usd_per_mtok' => 0.10, + 'cache_write_5m_usd_per_mtok' => 1.25, + 'cache_write_1h_usd_per_mtok' => 2.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', ], - 'claude-opus-4-6' => [ - 'input' => 150, // $15.00/1M tokens - 'output' => 750, // $75.00/1M tokens + 'claude-haiku-4-5-20251001' => [ + 'tier' => 'nano', + 'input_usd_per_mtok' => 1.00, + 'output_usd_per_mtok' => 5.00, + 'cache_read_usd_per_mtok' => 0.10, + 'cache_write_5m_usd_per_mtok' => 1.25, + 'cache_write_1h_usd_per_mtok' => 2.00, + 'context_window' => 200_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://docs.claude.com/en/docs/about-claude/pricing', ], ], 'openai' => [ + 'gpt-5' => [ + 'tier' => 'default', + 'input_usd_per_mtok' => 1.25, + 'output_usd_per_mtok' => 10.00, + 'cache_read_usd_per_mtok' => 0.13, + 'context_window' => 400_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://platform.openai.com/docs/pricing', + ], + 'gpt-5-nano' => [ + 'tier' => 'nano', + 'input_usd_per_mtok' => 0.05, + 'output_usd_per_mtok' => 0.40, + 'cache_read_usd_per_mtok' => 0.005, + 'context_window' => 128_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://platform.openai.com/docs/pricing', + ], 'gpt-4o' => [ - 'input' => 25, // $2.50/1M tokens - 'output' => 100, // $10.00/1M tokens + 'tier' => 'default', + 'input_usd_per_mtok' => 2.50, + 'output_usd_per_mtok' => 10.00, + 'cache_read_usd_per_mtok' => 1.25, + 'context_window' => 128_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://platform.openai.com/docs/pricing', ], 'gpt-4o-mini' => [ - 'input' => 2, // $0.15/1M tokens - 'output' => 6, // $0.60/1M tokens + 'tier' => 'nano', + 'input_usd_per_mtok' => 0.15, + 'output_usd_per_mtok' => 0.60, + 'cache_read_usd_per_mtok' => 0.075, + 'context_window' => 128_000, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://platform.openai.com/docs/pricing', ], ], 'google' => [ 'gemini-2.5-flash' => [ - 'input' => 1, // $0.075/1M tokens - 'output' => 3, // $0.30/1M tokens + 'tier' => 'nano', + 'input_usd_per_mtok' => 0.30, + 'output_usd_per_mtok' => 2.50, + 'context_window' => 1_048_576, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://ai.google.dev/pricing', ], 'gemini-2.5-pro' => [ - 'input' => 12, // $1.25/1M tokens - 'output' => 50, // $5.00/1M tokens + 'tier' => 'default', + 'input_usd_per_mtok' => 1.25, + 'output_usd_per_mtok' => 5.00, + 'context_window' => 1_048_576, + 'last_verified_at' => '2026-05-04', + 'source_url' => 'https://ai.google.dev/pricing', ], ], - // Portkey is a passthrough gateway — actual cost is tracked by Portkey. - // We store 0 credits locally to avoid double-billing. - 'portkey' => [ - '*' => ['input' => 0, 'output' => 0], - ], - - 'codex' => [ - 'gpt-5.3-codex' => ['input' => 0, 'output' => 0], - 'gpt-5.2-codex' => ['input' => 0, 'output' => 0], - 'gpt-5.1-codex-mini' => ['input' => 0, 'output' => 0], - ], - - 'claude-code' => [ - 'claude-sonnet-4-5' => ['input' => 0, 'output' => 0], - 'claude-opus-4-6' => ['input' => 0, 'output' => 0], - 'claude-haiku-4-5' => ['input' => 0, 'output' => 0], - ], - 'groq' => [ - 'llama-3.3-70b-versatile' => ['input' => 6, 'output' => 8], // $0.59/$0.79 per 1M - 'llama-3.1-8b-instant' => ['input' => 1, 'output' => 1], // $0.05/$0.08 per 1M - 'llama-4-scout-17b-16e' => ['input' => 1, 'output' => 3], // $0.11/$0.34 per 1M - 'gemma2-9b-it' => ['input' => 2, 'output' => 2], // $0.20/$0.20 per 1M - 'qwen-qwq-32b' => ['input' => 3, 'output' => 4], // $0.29/$0.39 per 1M - 'mixtral-8x7b-32768' => ['input' => 2, 'output' => 2], // $0.24/$0.24 per 1M + 'llama-3.3-70b-versatile' => ['tier' => 'default', 'input_usd_per_mtok' => 0.59, 'output_usd_per_mtok' => 0.79, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'llama-3.1-8b-instant' => ['tier' => 'nano', 'input_usd_per_mtok' => 0.05, 'output_usd_per_mtok' => 0.08, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'llama-4-scout-17b-16e' => ['tier' => 'default', 'input_usd_per_mtok' => 0.11, 'output_usd_per_mtok' => 0.34, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'gemma2-9b-it' => ['tier' => 'nano', 'input_usd_per_mtok' => 0.20, 'output_usd_per_mtok' => 0.20, 'context_window' => 8_192, 'last_verified_at' => '2026-05-04'], + 'qwen-qwq-32b' => ['tier' => 'heavy', 'input_usd_per_mtok' => 0.29, 'output_usd_per_mtok' => 0.39, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'mixtral-8x7b-32768' => ['tier' => 'default', 'input_usd_per_mtok' => 0.24, 'output_usd_per_mtok' => 0.24, 'context_window' => 32_768, 'last_verified_at' => '2026-05-04'], ], - // OpenRouter free models — zero cost (rate-limited by OpenRouter). - 'openrouter' => [], - 'mistral' => [ - 'mistral-large-latest' => ['input' => 20, 'output' => 60], // $2.00/$6.00 per 1M - 'mistral-small-latest' => ['input' => 1, 'output' => 3], // $0.10/$0.30 per 1M - 'codestral-latest' => ['input' => 2, 'output' => 6], // $0.20/$0.60 per 1M - 'mistral-nemo-latest' => ['input' => 2, 'output' => 2], // $0.15/$0.15 per 1M + 'mistral-large-latest' => ['tier' => 'default', 'input_usd_per_mtok' => 2.00, 'output_usd_per_mtok' => 6.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'mistral-small-latest' => ['tier' => 'nano', 'input_usd_per_mtok' => 0.10, 'output_usd_per_mtok' => 0.30, 'context_window' => 32_000, 'last_verified_at' => '2026-05-04'], + 'codestral-latest' => ['tier' => 'default', 'input_usd_per_mtok' => 0.20, 'output_usd_per_mtok' => 0.60, 'context_window' => 32_000, 'last_verified_at' => '2026-05-04'], + 'mistral-nemo-latest' => ['tier' => 'nano', 'input_usd_per_mtok' => 0.15, 'output_usd_per_mtok' => 0.15, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], ], 'deepseek' => [ - 'deepseek-chat' => ['input' => 3, 'output' => 11], // $0.27/$1.10 per 1M - 'deepseek-reasoner' => ['input' => 6, 'output' => 22], // $0.55/$2.19 per 1M + 'deepseek-chat' => ['tier' => 'default', 'input_usd_per_mtok' => 0.27, 'output_usd_per_mtok' => 1.10, 'context_window' => 64_000, 'last_verified_at' => '2026-05-04'], + 'deepseek-reasoner' => ['tier' => 'heavy', 'input_usd_per_mtok' => 0.55, 'output_usd_per_mtok' => 2.19, 'context_window' => 64_000, 'last_verified_at' => '2026-05-04'], ], 'xai' => [ - 'grok-3' => ['input' => 30, 'output' => 150], // $3.00/$15.00 per 1M - 'grok-3-mini' => ['input' => 3, 'output' => 5], // $0.30/$0.50 per 1M - 'grok-2-latest' => ['input' => 20, 'output' => 100], // $2.00/$10.00 per 1M + 'grok-3' => ['tier' => 'default', 'input_usd_per_mtok' => 3.00, 'output_usd_per_mtok' => 15.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'grok-3-mini' => ['tier' => 'nano', 'input_usd_per_mtok' => 0.30, 'output_usd_per_mtok' => 0.50, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'grok-2-latest' => ['tier' => 'default', 'input_usd_per_mtok' => 2.00, 'output_usd_per_mtok' => 10.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], ], 'perplexity' => [ - 'sonar-pro' => ['input' => 30, 'output' => 150], // $3.00/$15.00 per 1M - 'sonar' => ['input' => 10, 'output' => 10], // $1.00/$1.00 per 1M - 'sonar-reasoning' => ['input' => 10, 'output' => 50], // $1.00/$5.00 per 1M + 'sonar-pro' => ['tier' => 'default', 'input_usd_per_mtok' => 3.00, 'output_usd_per_mtok' => 15.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'sonar' => ['tier' => 'nano', 'input_usd_per_mtok' => 1.00, 'output_usd_per_mtok' => 1.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], + 'sonar-reasoning' => ['tier' => 'heavy', 'input_usd_per_mtok' => 1.00, 'output_usd_per_mtok' => 5.00, 'context_window' => 128_000, 'last_verified_at' => '2026-05-04'], ], 'fireworks' => [ - 'accounts/fireworks/models/llama-v3p3-70b-instruct' => ['input' => 9, 'output' => 9], // $0.90/$0.90 per 1M - 'accounts/fireworks/models/deepseek-r1' => ['input' => 30, 'output' => 80], // $3.00/$8.00 per 1M - 'accounts/fireworks/models/qwen3-235b-a22b' => ['input' => 2, 'output' => 9], // $0.22/$0.88 per 1M - 'accounts/fireworks/models/mixtral-8x22b-instruct' => ['input' => 12, 'output' => 12], // $1.20/$1.20 per 1M + 'accounts/fireworks/models/llama-v3p3-70b-instruct' => ['tier' => 'default', 'input_usd_per_mtok' => 0.90, 'output_usd_per_mtok' => 0.90, 'last_verified_at' => '2026-05-04'], + 'accounts/fireworks/models/deepseek-r1' => ['tier' => 'heavy', 'input_usd_per_mtok' => 3.00, 'output_usd_per_mtok' => 8.00, 'last_verified_at' => '2026-05-04'], + 'accounts/fireworks/models/qwen3-235b-a22b' => ['tier' => 'default', 'input_usd_per_mtok' => 0.22, 'output_usd_per_mtok' => 0.88, 'last_verified_at' => '2026-05-04'], + 'accounts/fireworks/models/mixtral-8x22b-instruct' => ['tier' => 'default', 'input_usd_per_mtok' => 1.20, 'output_usd_per_mtok' => 1.20, 'last_verified_at' => '2026-05-04'], ], - // Local HTTP LLM providers — zero cost (runs on your hardware). - // CostCalculator returns 0 for unknown models, so no per-model entries needed. - // Add specific model entries here if you want explicit cost tracking. + // Passthrough / zero-cost providers — pricing tracked elsewhere or runs locally. + 'portkey' => [ + '*' => ['tier' => 'default', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + ], + 'codex' => [ + 'gpt-5.3-codex' => ['tier' => 'default', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + 'gpt-5.2-codex' => ['tier' => 'default', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + 'gpt-5.1-codex-mini' => ['tier' => 'nano', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + ], + 'claude-code' => [ + 'claude-sonnet-4-5' => ['tier' => 'default', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + 'claude-opus-4-6' => ['tier' => 'heavy', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + 'claude-haiku-4-5' => ['tier' => 'nano', 'input_usd_per_mtok' => 0, 'output_usd_per_mtok' => 0], + ], + 'openrouter' => [], 'ollama' => [], 'openai_compatible' => [], 'litellm_proxy' => [], @@ -120,33 +269,59 @@ /* |-------------------------------------------------------------------------- - | Context Window Sizes (tokens) + | Non-LLM cost categories (scaffolded, P0 cost = 0) + |-------------------------------------------------------------------------- + | + | Threaded through but NOT deducted yet. Real prices land in P1+ once + | each subsystem reports usage events. Schema is stable — adding new keys + | here doesn't break anything. + | + */ + 'compute' => [ + 'browser_session_minute' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + 'code_execution_minute' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + 'sandbox_gb_hour' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + ], + 'outbound' => [ + 'email_send' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + 'sms_send' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + ], + 'storage' => [ + 'artifact_gb_month' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + ], + 'tool' => [ + 'mcp_call' => ['usd_per_unit' => 0.00, 'last_verified_at' => '2026-05-04'], + ], + + /* + |-------------------------------------------------------------------------- + | Context Window Sizes (legacy lookup — back-compat for ContextHealthService) |-------------------------------------------------------------------------- - | Used by ContextHealthService to compute what fraction of a model's - | context window an experiment has consumed across all its LLM calls. - | Falls back to 200_000 for unknown models. + | + | Now sourced from per-model `context_window` field above. This map kept for + | callers using the old `config('llm_pricing.context_windows.{model}')` path. + | Will be removed once all callers migrate to per-provider lookup. + | */ 'context_windows' => [ - // Anthropic 'claude-sonnet-4-5-20250929' => 200_000, 'claude-haiku-4-5-20251001' => 200_000, 'claude-opus-4-6' => 200_000, - // OpenAI + 'claude-opus-4-7' => 200_000, 'gpt-4o' => 128_000, 'gpt-4o-mini' => 128_000, - // Google + 'gpt-5' => 400_000, + 'gpt-5-nano' => 128_000, 'gemini-2.5-flash' => 1_048_576, 'gemini-2.5-pro' => 1_048_576, - // Groq (various Llama models) 'llama-3.3-70b-versatile' => 128_000, 'llama-3.1-8b-instant' => 128_000, - // Mistral 'mistral-large-latest' => 128_000, 'mistral-small-latest' => 32_000, ], - // Default estimation multiplier for budget reservation - // Reserve 1.5x the estimated cost to account for retries + // Default reservation multiplier for budget reservation (back-compat). + // CostCalculator::estimateCost() falls back to this when tier-specific not found. 'reservation_multiplier' => 1.5, // Maximum tokens per request (safety limit) diff --git a/tests/Unit/Domain/Budget/CostCalculatorPlatformCreditsTest.php b/tests/Unit/Domain/Budget/CostCalculatorPlatformCreditsTest.php new file mode 100644 index 00000000..d45ecfea --- /dev/null +++ b/tests/Unit/Domain/Budget/CostCalculatorPlatformCreditsTest.php @@ -0,0 +1,208 @@ +calc = new CostCalculator; + // Defaults: usd_per_credit=0.01, margin=1.30, min=1 + config()->set('llm_pricing.usd_per_credit', 0.01); + config()->set('llm_pricing.margin_multiplier', 1.30); + config()->set('llm_pricing.min_credits_per_call', 1); + config()->set('llm_pricing.max_credits_per_call', null); + } + + public function test_nano_call_floors_at_min_credits(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'openai', + model: 'gpt-5-nano', + inputTokens: 3000, + outputTokens: 800, + ); + + // raw = 3000/1e6 * 0.05 + 800/1e6 * 0.40 = 0.00015 + 0.00032 = 0.00047 + // billable = 0.00047 * 1.30 = 0.000611 + // ceil(0.000611 / 0.01) = 1, but min floor = 1 + $this->assertSame(1, $result['platform_credits']); + $this->assertEqualsWithDelta(0.00047, $result['raw_cost_usd'], 0.0001); + $this->assertEqualsWithDelta(0.000611, $result['billable_cost_usd'], 0.0001); + $this->assertSame(1.30, $result['margin_applied']); + } + + public function test_haiku_nano_call_floors_at_min(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-haiku-4-5', + inputTokens: 3000, + outputTokens: 800, + ); + + $this->assertSame(1, $result['platform_credits']); + $this->assertGreaterThan(0, $result['raw_cost_usd']); + } + + public function test_sonnet_medium_call(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-sonnet-4-6', + inputTokens: 5000, + outputTokens: 2000, + ); + + // raw = 5000/1e6 * 3 + 2000/1e6 * 15 = 0.015 + 0.030 = 0.045 + // billable = 0.045 * 1.30 = 0.0585 + // ceil(0.0585 / 0.01) = 6 + $this->assertSame(6, $result['platform_credits']); + $this->assertEqualsWithDelta(0.045, $result['raw_cost_usd'], 0.001); + } + + public function test_opus_heavy_does_not_lose_money(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-opus-4-7', + inputTokens: 10_000, + outputTokens: 5000, + ); + + // raw = 10K/1e6 * 5 + 5K/1e6 * 25 = 0.05 + 0.125 = 0.175 + // billable = 0.175 * 1.30 = 0.2275 + // ceil(0.2275 / 0.01) = 23 + $this->assertGreaterThanOrEqual(23, $result['platform_credits']); + // Customer pays 0.23+ USD; cost was 0.175 USD → ~30% margin. Not a loss. + $charged = $result['platform_credits'] * 0.01; + $this->assertGreaterThan($result['raw_cost_usd'], $charged, 'Charged amount must exceed raw cost.'); + } + + public function test_margin_override_takes_precedence(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-opus-4-7', + inputTokens: 10_000, + outputTokens: 5000, + marginOverride: 1.50, + ); + + $this->assertSame(1.50, $result['margin_applied']); + // Higher margin → more credits + $resultDefault = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-opus-4-7', + inputTokens: 10_000, + outputTokens: 5000, + ); + $this->assertGreaterThan($resultDefault['platform_credits'], $result['platform_credits']); + } + + public function test_max_cap_clamps_runaway_call(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-opus-4-7', + inputTokens: 100_000, + outputTokens: 50_000, + maxCapOverride: 10, + ); + + $this->assertSame(10, $result['platform_credits']); + } + + public function test_min_floor_wins_over_max_cap(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'openai', + model: 'gpt-5-nano', + inputTokens: 100, + outputTokens: 50, + maxCapOverride: 20, + ); + + $this->assertSame(1, $result['platform_credits']); + } + + public function test_unknown_model_returns_zero_gracefully(): void + { + $result = $this->calc->calculatePlatformCredits( + provider: 'made-up-provider', + model: 'made-up-model', + inputTokens: 1000, + outputTokens: 500, + ); + + $this->assertSame(0, $result['platform_credits']); + $this->assertSame(0.0, $result['raw_cost_usd']); + } + + public function test_cached_input_uses_cache_read_rate(): void + { + $with = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-sonnet-4-6', + inputTokens: 5000, + outputTokens: 2000, + cachedInputTokens: 4000, + ); + $without = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-sonnet-4-6', + inputTokens: 5000, + outputTokens: 2000, + cachedInputTokens: 0, + ); + + // 4K cached at 0.30/Mtok vs 4K uncached at 3.00/Mtok → way cheaper + $this->assertLessThan($without['raw_cost_usd'], $with['raw_cost_usd']); + $this->assertLessThanOrEqual($without['platform_credits'], $with['platform_credits']); + } + + public function test_cache_write_5m_adds_surcharge(): void + { + $with = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-sonnet-4-6', + inputTokens: 5000, + outputTokens: 2000, + cacheStrategy: CostCalculator::CACHE_STRATEGY_5M, + ); + $without = $this->calc->calculatePlatformCredits( + provider: 'anthropic', + model: 'claude-sonnet-4-6', + inputTokens: 5000, + outputTokens: 2000, + ); + + $this->assertGreaterThan($without['raw_cost_usd'], $with['raw_cost_usd']); + } + + public function test_estimate_uses_tier_reservation_multiplier(): void + { + config()->set('llm_pricing.reservation_multipliers.nano', 1.2); + config()->set('llm_pricing.reservation_multipliers.heavy', 2.0); + + $nano = $this->calc->estimatePlatformCredits('openai', 'gpt-5-nano', 500, 1000); + $heavy = $this->calc->estimatePlatformCredits('anthropic', 'claude-opus-4-7', 500, 1000); + + // Nano: tier-multiplier 1.2 applies → smaller estimate buffer + // Heavy: 2.0 applies → bigger buffer, plus heavy raw cost + $this->assertGreaterThan($nano, $heavy); + } + + public function test_back_compat_calculate_cost_still_returns_cost_credits(): void + { + $cost = $this->calc->calculateCost('anthropic', 'claude-sonnet-4-6', 5000, 2000); + // raw = 0.045 USD; cost_credits = 0.045 / 0.001 = 45 + $this->assertSame(45, $cost); + } +}