diff --git a/scripts/query-usage-stats.ts b/scripts/query-usage-stats.ts new file mode 100644 index 0000000000..371701902d --- /dev/null +++ b/scripts/query-usage-stats.ts @@ -0,0 +1,99 @@ +import { db } from '@codebuff/internal/db' +import { sql } from 'drizzle-orm' + +async function queryUsageStats() { + console.log( + 'Querying usage stats for the last 7 days (minimax-m2.5, claude-4.6-opus)...\n', + ) + + const result = await db.execute(sql` + WITH recent AS ( + SELECT + input_tokens, + cache_read_input_tokens, + COALESCE(cache_creation_input_tokens, 0) AS cache_creation_input_tokens, + output_tokens, + finished_at, + client_id + FROM message + WHERE finished_at >= NOW() - INTERVAL '4 days' + AND model IN ('minimax/minimax-m2.5') + ), + + token_stats AS ( + SELECT + ROUND(AVG(input_tokens + cache_read_input_tokens + cache_creation_input_tokens)) + AS avg_total_input_tokens, + ROUND( + AVG( + CASE + WHEN (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) > 0 + THEN cache_read_input_tokens::numeric + / (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) + ELSE 0 + END + ) * 100, 1 + ) AS avg_cache_rate_pct, + ROUND(AVG(output_tokens)) + AS avg_output_tokens, + COUNT(*) AS total_requests + FROM recent + ), + + client_stats AS ( + SELECT + ROUND(AVG(cnt)) AS avg_requests_per_client + FROM ( + SELECT client_id, COUNT(*) AS cnt + FROM recent + WHERE client_id IS NOT NULL + GROUP BY client_id + ) per_client + ), + + rps AS ( + SELECT + COUNT(*) AS req_count + FROM recent + GROUP BY date_trunc('second', finished_at) + ), + + rps_stats AS ( + SELECT + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY req_count) AS median_rps, + MAX(req_count) AS peak_rps + FROM rps + ) + + SELECT + t.avg_total_input_tokens, + t.avg_cache_rate_pct, + t.avg_output_tokens, + c.avg_requests_per_client, + r.median_rps, + r.peak_rps, + t.total_requests + FROM token_stats t, rps_stats r, client_stats c + `) + + const row = result[0] + if (!row) { + console.log('No data found for the given filters.') + return + } + + console.log('Results:') + console.log('─────────────────────────────────────────') + console.log(`Avg total input tokens: ${row.avg_total_input_tokens}`) + console.log(`Avg cache rate: ${row.avg_cache_rate_pct}%`) + console.log(`Avg output tokens: ${row.avg_output_tokens}`) + console.log(`Median RPS: ${row.median_rps}`) + console.log(`Peak RPS: ${row.peak_rps}`) + console.log(`Avg requests/client: ${row.avg_requests_per_client}`) + console.log(`Total requests (7d): ${row.total_requests}`) +} + +queryUsageStats().then(() => process.exit(0)).catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index bfd7afb407..df8f356d17 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -155,6 +155,7 @@ describe('Fireworks deployment routing', () => { originalModel: 'minimax/minimax-m2.5', fetch: mockFetch, logger, + sessionId: 'test-user-id', }) expect(response.status).toBe(200) @@ -182,6 +183,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) expect(response.status).toBe(200) @@ -225,6 +227,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) expect(response.status).toBe(200) @@ -262,6 +265,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }), ).rejects.toBeInstanceOf(FireworksError) } finally { @@ -287,6 +291,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) expect(response.status).toBe(200) @@ -314,6 +319,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) expect(response.status).toBe(200) @@ -345,6 +351,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) // Non-503 errors from deployment are returned as-is (caller handles them) @@ -384,6 +391,7 @@ describe('Fireworks deployment routing', () => { fetch: mockFetch, logger, useCustomDeployment: true, + sessionId: 'test-user-id', }) expect(logger.info).toHaveBeenCalledTimes(2) diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 6f890a0a34..2b28937415 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -92,8 +92,9 @@ function createFireworksRequest(params: { originalModel: string fetch: typeof globalThis.fetch modelIdOverride?: string + sessionId: string }) { - const { body, originalModel, fetch, modelIdOverride } = params + const { body, originalModel, fetch, modelIdOverride, sessionId } = params const fireworksBody: Record = { ...body, model: modelIdOverride ?? getFireworksModelId(originalModel), @@ -115,6 +116,7 @@ function createFireworksRequest(params: { headers: { Authorization: `Bearer ${env.FIREWORKS_API_KEY}`, 'Content-Type': 'application/json', + 'x-session-affinity': sessionId }, body: JSON.stringify(fireworksBody), // @ts-expect-error - dispatcher is a valid undici option not in fetch types @@ -168,7 +170,7 @@ export async function handleFireworksNonStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger }) + const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger, sessionId: userId }) if (!response.ok) { throw await parseFireworksError(response) @@ -244,7 +246,7 @@ export async function handleFireworksStream({ const startTime = new Date() const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger }) - const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger }) + const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger, sessionId: userId }) if (!response.ok) { throw await parseFireworksError(response) @@ -657,8 +659,9 @@ export async function createFireworksRequestWithFallback(params: { fetch: typeof globalThis.fetch logger: Logger useCustomDeployment?: boolean + sessionId: string }): Promise { - const { body, originalModel, fetch, logger } = params + const { body, originalModel, fetch, logger, sessionId } = params const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel] const shouldTryDeployment = @@ -677,6 +680,7 @@ export async function createFireworksRequestWithFallback(params: { originalModel, fetch, modelIdOverride: deploymentModelId, + sessionId, }) if (response.status === 503) { @@ -697,7 +701,7 @@ export async function createFireworksRequestWithFallback(params: { } } - return createFireworksRequest({ body, originalModel, fetch }) + return createFireworksRequest({ body, originalModel, fetch, sessionId }) } function creditsToFakeCost(credits: number): number {