Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions scripts/query-usage-stats.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { db } from '@codebuff/internal/db'
import { sql } from 'drizzle-orm'

async function queryUsageStats() {
console.log(
'Querying usage stats for the last 7 days (minimax-m2.5, claude-4.6-opus)...\n',
)

const result = await db.execute(sql`
WITH recent AS (
SELECT
input_tokens,
cache_read_input_tokens,
COALESCE(cache_creation_input_tokens, 0) AS cache_creation_input_tokens,
output_tokens,
finished_at,
client_id
FROM message
WHERE finished_at >= NOW() - INTERVAL '4 days'
AND model IN ('minimax/minimax-m2.5')
),

token_stats AS (
SELECT
ROUND(AVG(input_tokens + cache_read_input_tokens + cache_creation_input_tokens))
AS avg_total_input_tokens,
ROUND(
AVG(
CASE
WHEN (input_tokens + cache_read_input_tokens + cache_creation_input_tokens) > 0
THEN cache_read_input_tokens::numeric
/ (input_tokens + cache_read_input_tokens + cache_creation_input_tokens)
ELSE 0
END
) * 100, 1
) AS avg_cache_rate_pct,
ROUND(AVG(output_tokens))
AS avg_output_tokens,
COUNT(*) AS total_requests
FROM recent
),

client_stats AS (
SELECT
ROUND(AVG(cnt)) AS avg_requests_per_client
FROM (
SELECT client_id, COUNT(*) AS cnt
FROM recent
WHERE client_id IS NOT NULL
GROUP BY client_id
) per_client
),

rps AS (
SELECT
COUNT(*) AS req_count
FROM recent
GROUP BY date_trunc('second', finished_at)
),

rps_stats AS (
SELECT
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY req_count) AS median_rps,
MAX(req_count) AS peak_rps
FROM rps
)

SELECT
t.avg_total_input_tokens,
t.avg_cache_rate_pct,
t.avg_output_tokens,
c.avg_requests_per_client,
r.median_rps,
r.peak_rps,
t.total_requests
FROM token_stats t, rps_stats r, client_stats c
`)

const row = result[0]
if (!row) {
console.log('No data found for the given filters.')
return
}

console.log('Results:')
console.log('─────────────────────────────────────────')
console.log(`Avg total input tokens: ${row.avg_total_input_tokens}`)
console.log(`Avg cache rate: ${row.avg_cache_rate_pct}%`)
console.log(`Avg output tokens: ${row.avg_output_tokens}`)
console.log(`Median RPS: ${row.median_rps}`)
console.log(`Peak RPS: ${row.peak_rps}`)
console.log(`Avg requests/client: ${row.avg_requests_per_client}`)
console.log(`Total requests (7d): ${row.total_requests}`)
}

queryUsageStats().then(() => process.exit(0)).catch((err) => {
console.error(err)
process.exit(1)
})
8 changes: 8 additions & 0 deletions web/src/llm-api/__tests__/fireworks-deployment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ describe('Fireworks deployment routing', () => {
originalModel: 'minimax/minimax-m2.5',
fetch: mockFetch,
logger,
sessionId: 'test-user-id',
})

expect(response.status).toBe(200)
Expand Down Expand Up @@ -182,6 +183,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

expect(response.status).toBe(200)
Expand Down Expand Up @@ -225,6 +227,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

expect(response.status).toBe(200)
Expand Down Expand Up @@ -262,6 +265,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
}),
).rejects.toBeInstanceOf(FireworksError)
} finally {
Expand All @@ -287,6 +291,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

expect(response.status).toBe(200)
Expand Down Expand Up @@ -314,6 +319,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

expect(response.status).toBe(200)
Expand Down Expand Up @@ -345,6 +351,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

// Non-503 errors from deployment are returned as-is (caller handles them)
Expand Down Expand Up @@ -384,6 +391,7 @@ describe('Fireworks deployment routing', () => {
fetch: mockFetch,
logger,
useCustomDeployment: true,
sessionId: 'test-user-id',
})

expect(logger.info).toHaveBeenCalledTimes(2)
Expand Down
14 changes: 9 additions & 5 deletions web/src/llm-api/fireworks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ function createFireworksRequest(params: {
originalModel: string
fetch: typeof globalThis.fetch
modelIdOverride?: string
sessionId: string
}) {
const { body, originalModel, fetch, modelIdOverride } = params
const { body, originalModel, fetch, modelIdOverride, sessionId } = params
const fireworksBody: Record<string, unknown> = {
...body,
model: modelIdOverride ?? getFireworksModelId(originalModel),
Expand All @@ -115,6 +116,7 @@ function createFireworksRequest(params: {
headers: {
Authorization: `Bearer ${env.FIREWORKS_API_KEY}`,
'Content-Type': 'application/json',
'x-session-affinity': sessionId
},
body: JSON.stringify(fireworksBody),
// @ts-expect-error - dispatcher is a valid undici option not in fetch types
Expand Down Expand Up @@ -168,7 +170,7 @@ export async function handleFireworksNonStream({
const startTime = new Date()
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })

const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger })
const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger, sessionId: userId })

if (!response.ok) {
throw await parseFireworksError(response)
Expand Down Expand Up @@ -244,7 +246,7 @@ export async function handleFireworksStream({
const startTime = new Date()
const { clientId, clientRequestId, costMode } = extractRequestMetadata({ body, logger })

const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger })
const response = await createFireworksRequestWithFallback({ body, originalModel, fetch, logger, sessionId: userId })

if (!response.ok) {
throw await parseFireworksError(response)
Expand Down Expand Up @@ -657,8 +659,9 @@ export async function createFireworksRequestWithFallback(params: {
fetch: typeof globalThis.fetch
logger: Logger
useCustomDeployment?: boolean
sessionId: string
}): Promise<Response> {
const { body, originalModel, fetch, logger } = params
const { body, originalModel, fetch, logger, sessionId } = params
const useCustomDeployment = params.useCustomDeployment ?? FIREWORKS_USE_CUSTOM_DEPLOYMENT
const deploymentModelId = FIREWORKS_DEPLOYMENT_MAP[originalModel]
const shouldTryDeployment =
Expand All @@ -677,6 +680,7 @@ export async function createFireworksRequestWithFallback(params: {
originalModel,
fetch,
modelIdOverride: deploymentModelId,
sessionId,
})

if (response.status === 503) {
Expand All @@ -697,7 +701,7 @@ export async function createFireworksRequestWithFallback(params: {
}
}

return createFireworksRequest({ body, originalModel, fetch })
return createFireworksRequest({ body, originalModel, fetch, sessionId })
}

function creditsToFakeCost(credits: number): number {
Expand Down
Loading