Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,18 @@ ruff check .
- **abi_extractor.py**: Regex-based ABI extraction from Stylus Rust code (no Docker needed)
- **compiler_verifier.py**: Docker-based `cargo check` with structured error parsing and LLM fix loop

### Rate Limits (`apps/web/src/lib/rateLimit.ts`)
- Two-window enforcement (per-minute burst + per-day total), per-key (or per-user for session auth), per-category (chat and tool counters are independent). Whichever window is exhausted first triggers 429.
- KV keys: `rl:{subject}:{category}:m:{YYYY-MM-DDTHH:MM}` (TTL 120s) and `rl:{subject}:{category}:d:{YYYY-MM-DD}` (TTL 48h).
- Tiers (code-defined; only the name lives in `api_keys.rate_limit_tier`):
- `free` (default): 100/min, 1000/day
- `pro`: 500/min, 10000/day
- `unlimited`: 10K/min, 1M/day (effectively uncapped)
- Enforcement points: `/api/v1/chat/completions`, every `/api/v1/tools/*` route, and `tools/call` on `/mcp`. Admin requests (`AUTH_SECRET` Bearer) bypass.
- Headers on every response: bottleneck `X-RateLimit-Limit/-Remaining/-Reset`, plus per-window `-Minute` and `-Day` variants, plus `X-RateLimit-Tier`. 429 also carries `Retry-After` for the denying window.
- `GET /api/v1/usage` returns current counter state without incrementing — for client-side planning.
- Tier management: `GET/PATCH /api/admin/rate-limits` (admin secret), surfaced in `/dashboard/admin` under the "Rate Limits" tab.

### Worker-Native Ingestion Pipeline (`apps/web/src/lib/`)
- **scraper.ts**: Web documentation scraping via HTMLRewriter + regex HTML-to-markdown
- **github.ts**: GitHub repo scraping via Trees API + Contents API (no tarball)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ https://arbuilder.app/mcp

- Requires `arb_` API key from dashboard
- Usage tracked per API key
- Rate limited per free tier (100 calls/day)
- Rate limited per tier with two windows (burst per minute + total per day). Free tier: 100 req/min and 1000 req/day, applied separately to chat and tool calls. Admin can promote keys to `pro` (500/min, 10K/day) or `unlimited` from the admin dashboard. Every response carries `X-RateLimit-*` headers.

### Chat Completions API (OpenAI-compatible)

Expand Down
5 changes: 5 additions & 0 deletions apps/web/migrations/0005_rate_limits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Add rate_limit_tier to api_keys for tier-based daily quotas.
-- Tiers: 'free' (default), 'pro', 'unlimited'.
-- Limits live in code (apps/web/src/lib/rateLimit.ts) so they can be tuned
-- without a migration; this column only carries the tier name.
ALTER TABLE api_keys ADD COLUMN rate_limit_tier TEXT NOT NULL DEFAULT 'free';
128 changes: 128 additions & 0 deletions apps/web/src/app/api/admin/rate-limits/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
* Admin API for managing per-key rate-limit tiers.
*
* GET /api/admin/rate-limits — list all api_keys with tier + recent usage
* PATCH /api/admin/rate-limits — body: { keyId, tier } — update tier on a key
*
* Headers: X-Admin-Secret: <AUTH_SECRET>
*
* Tiers are validated against TIER_LIMITS in @/lib/rateLimit.
*/

import { NextRequest, NextResponse } from "next/server";
import { getCloudflareContext } from "@opennextjs/cloudflare";
import { getLimitsForTier, type RateLimitTier } from "@/lib/rateLimit";

const VALID_TIERS: RateLimitTier[] = ["free", "pro", "unlimited"];

function verifyAuth(request: NextRequest, authSecret: string): boolean {
return request.headers.get("X-Admin-Secret") === authSecret;
}

interface KeyRow {
id: string;
user_id: string;
user_email: string | null;
key_prefix: string;
name: string | null;
rate_limit_tier: string;
created_at: string;
last_used_at: string | null;
revoked_at: string | null;
calls_24h: number;
}

export async function GET(request: NextRequest) {
try {
const { env } = getCloudflareContext();
if (!verifyAuth(request, env.AUTH_SECRET)) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}

const since = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();

const result = await env.DB
.prepare(
`SELECT k.id, k.user_id, u.email AS user_email, k.key_prefix, k.name,
k.rate_limit_tier, k.created_at, k.last_used_at, k.revoked_at,
COALESCE(c.cnt, 0) AS calls_24h
FROM api_keys k
LEFT JOIN users u ON u.id = k.user_id
LEFT JOIN (
SELECT api_key_id, COUNT(*) AS cnt
FROM usage_logs
WHERE created_at >= ?
GROUP BY api_key_id
) c ON c.api_key_id = k.id
ORDER BY k.created_at DESC
LIMIT 500`,
)
.bind(since)
.all<KeyRow>();

const keys = (result.results ?? []).map((r) => {
const lim = getLimitsForTier(r.rate_limit_tier);
return {
id: r.id,
userId: r.user_id,
userEmail: r.user_email,
keyPrefix: r.key_prefix,
name: r.name,
tier: r.rate_limit_tier,
limits: { perMinute: lim.perMinute, perDay: lim.perDay },
createdAt: r.created_at,
lastUsedAt: r.last_used_at,
revokedAt: r.revoked_at,
calls24h: r.calls_24h,
};
});

return NextResponse.json({ tiers: VALID_TIERS, keys });
} catch (e) {
console.error("admin/rate-limits GET failed:", e);
return NextResponse.json({ error: (e as Error).message || String(e) }, { status: 500 });
}
}

export async function PATCH(request: NextRequest) {
try {
const { env } = getCloudflareContext();
if (!verifyAuth(request, env.AUTH_SECRET)) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}

const body = (await request.json().catch(() => ({}))) as {
keyId?: string;
tier?: string;
};
if (!body.keyId || !body.tier) {
return NextResponse.json({ error: "Missing keyId or tier" }, { status: 400 });
}
if (!VALID_TIERS.includes(body.tier as RateLimitTier)) {
return NextResponse.json(
{ error: `Invalid tier. Must be one of: ${VALID_TIERS.join(", ")}` },
{ status: 400 },
);
}

const r = await env.DB
.prepare(`UPDATE api_keys SET rate_limit_tier = ? WHERE id = ?`)
.bind(body.tier, body.keyId)
.run();

if (r.meta.changes === 0) {
return NextResponse.json({ error: "Key not found" }, { status: 404 });
}

const lim = getLimitsForTier(body.tier);
return NextResponse.json({
ok: true,
keyId: body.keyId,
tier: body.tier,
limits: { perMinute: lim.perMinute, perDay: lim.perDay },
});
} catch (e) {
console.error("admin/rate-limits PATCH failed:", e);
return NextResponse.json({ error: (e as Error).message || String(e) }, { status: 500 });
}
}
102 changes: 102 additions & 0 deletions apps/web/src/app/api/keys/usage/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* GET /api/keys/usage
*
* Session-auth only. Returns current rate-limit counters and 24h activity
* for every active key owned by the logged-in user. Used by /dashboard/keys
* to render per-key usage widgets without making the user paste their keys.
*
* Like /api/v1/usage, this does NOT increment any counter.
*/

import { NextResponse } from "next/server";
import { getCloudflareContext } from "@opennextjs/cloudflare";
import { auth } from "@/auth";
import { peekUsage, getLimitsForTier } from "@/lib/rateLimit";

interface KeyRow {
id: string;
rate_limit_tier: string;
last_used_at: string | null;
}

interface UsageRow {
api_key_id: string;
total: number;
ok: number | null;
last: string | null;
}

export async function GET() {
try {
const session = await auth();
if (!session?.user?.id) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}

const { env } = getCloudflareContext();

const keysRes = await env.DB
.prepare(
`SELECT id, rate_limit_tier, last_used_at
FROM api_keys
WHERE user_id = ? AND revoked_at IS NULL`,
)
.bind(session.user.id)
.all<KeyRow>();

const keys = keysRes.results ?? [];
if (keys.length === 0) return NextResponse.json({ usage: {} });

// Single 24h-window aggregate query for all of the user's keys.
const since = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
const placeholders = keys.map(() => "?").join(",");
const params = [...keys.map((k) => k.id), since];
const usageRes = await env.DB
.prepare(
`SELECT api_key_id,
COUNT(*) AS total,
SUM(success) AS ok,
MAX(created_at) AS last
FROM usage_logs
WHERE api_key_id IN (${placeholders}) AND created_at >= ?
GROUP BY api_key_id`,
)
.bind(...params)
.all<UsageRow>();

const recentByKey = new Map<string, UsageRow>();
for (const r of usageRes.results ?? []) recentByKey.set(r.api_key_id, r);

// Read live counters from KV per key + category in parallel.
const peeks = await Promise.all(
keys.flatMap((k) => [
peekUsage(env.KV, `key:${k.id}`, "chat", k.rate_limit_tier).then((d) => ({ id: k.id, cat: "chat" as const, d })),
peekUsage(env.KV, `key:${k.id}`, "tool", k.rate_limit_tier).then((d) => ({ id: k.id, cat: "tool" as const, d })),
]),
);

const usage: Record<string, unknown> = {};
for (const k of keys) {
const chat = peeks.find((p) => p.id === k.id && p.cat === "chat")!.d;
const tool = peeks.find((p) => p.id === k.id && p.cat === "tool")!.d;
const r = recentByKey.get(k.id);
const total = r?.total ?? 0;
usage[k.id] = {
tier: k.rate_limit_tier,
limits: getLimitsForTier(k.rate_limit_tier),
chat: { minute: chat.minute, day: chat.day },
tool: { minute: tool.minute, day: tool.day },
recent: {
calls24h: total,
lastCallAt: r?.last ?? null,
successRate: total > 0 ? (r?.ok ?? 0) / total : null,
},
};
}

return NextResponse.json({ usage });
} catch (e) {
console.error("/api/keys/usage failed:", e);
return NextResponse.json({ error: (e as Error).message || String(e) }, { status: 500 });
}
}
26 changes: 24 additions & 2 deletions apps/web/src/app/api/v1/chat/completions/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { getCloudflareContext } from "@opennextjs/cloudflare";
import { validateRequest } from "@/lib/auth/validateRequest";
import { runAgentNonStreaming, runAgentStreaming } from "@/lib/chat/agent";
import { encodeSSEChunk, encodeSSEDone, encodeSSEError } from "@/lib/chat/streaming";
import { enforceRateLimit, rateLimitHeaders, subjectFor } from "@/lib/rateLimit";
import type {
ChatCompletionRequest,
ChatCompletionResponse,
Expand All @@ -26,9 +27,10 @@ function errorResponse(
type: string,
status: number,
code?: string,
extraHeaders?: Record<string, string>,
): NextResponse {
const body: OpenAIErrorBody = { error: { message, type, code: code ?? null } };
return NextResponse.json(body, { status });
return NextResponse.json(body, { status, headers: extraHeaders });
}

async function logChatUsage(
Expand Down Expand Up @@ -76,6 +78,25 @@ export async function POST(request: NextRequest) {
);
}

// Rate limit — per-key for arb_ keys, per-user for session auth, bypass for admin.
const subj = subjectFor(auth);
let rlHeaders: Record<string, string> = {};
if (subj) {
const decision = await enforceRateLimit(env.KV, subj.subject, "chat", subj.tier);
rlHeaders = rateLimitHeaders(decision);
if (!decision.allowed) {
const denyWindow = decision.exceededWindow === "minute" ? decision.minute : decision.day;
const label = decision.exceededWindow === "minute" ? "per-minute" : "per-day";
return errorResponse(
`Chat rate limit exceeded (${label}: ${denyWindow.limit} on tier '${decision.tier}'). Try again in ${denyWindow.resetSeconds}s.`,
"rate_limit_exceeded",
429,
undefined,
rlHeaders,
);
}
}

// Parse body.
let body: ChatCompletionRequest;
try {
Expand Down Expand Up @@ -159,6 +180,7 @@ export async function POST(request: NextRequest) {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache, no-transform",
Connection: "keep-alive",
...rlHeaders,
},
});
}
Expand Down Expand Up @@ -187,7 +209,7 @@ export async function POST(request: NextRequest) {
env.DB, apiKeyId, result.toolCallNames, result.usage.total_tokens, Date.now() - start, true,
);
}
return NextResponse.json(response);
return NextResponse.json(response, { headers: rlHeaders });
} catch (e) {
const msg = (e as Error).message || String(e);
if (apiKeyId) {
Expand Down
5 changes: 4 additions & 1 deletion apps/web/src/app/api/v1/tools/ask-bridging/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ import { NextRequest, NextResponse } from "next/server";
import { askBridging, type AskBridgingInput } from "@/lib/tools/askBridging";
import { getCloudflareContext } from "@opennextjs/cloudflare";
import { validateRequest } from "@/lib/auth/validateRequest";
import { checkToolRateLimit } from "@/lib/rateLimit";

export async function POST(request: NextRequest) {
try {
const { env } = getCloudflareContext();
const auth = await validateRequest(request, env.DB, env.AUTH_SECRET);
if (!auth.success) return auth.response;
const rl = await checkToolRateLimit(env.KV, auth);
if ("response" in rl) return rl.response;

if (!env.OPENROUTER_API_KEY) {
return NextResponse.json(
Expand Down Expand Up @@ -36,7 +39,7 @@ export async function POST(request: NextRequest) {
}
);

return NextResponse.json(result);
return NextResponse.json(result, { headers: rl.headers });
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error("Error in askBridging:", message, error);
Expand Down
5 changes: 4 additions & 1 deletion apps/web/src/app/api/v1/tools/ask-orbit/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ import { NextRequest, NextResponse } from "next/server";
import { askOrbit, type AskOrbitInput } from "@/lib/tools/askOrbit";
import { getCloudflareContext } from "@opennextjs/cloudflare";
import { validateRequest } from "@/lib/auth/validateRequest";
import { checkToolRateLimit } from "@/lib/rateLimit";

export async function POST(request: NextRequest) {
try {
const { env } = getCloudflareContext();
const auth = await validateRequest(request, env.DB, env.AUTH_SECRET);
if (!auth.success) return auth.response;
const rl = await checkToolRateLimit(env.KV, auth);
if ("response" in rl) return rl.response;

if (!env.OPENROUTER_API_KEY) {
return NextResponse.json(
Expand Down Expand Up @@ -35,7 +38,7 @@ export async function POST(request: NextRequest) {
}
);

return NextResponse.json(result);
return NextResponse.json(result, { headers: rl.headers });
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
console.error("Error in askOrbit:", message, error);
Expand Down
Loading