triggerdotdev
diff --git a/‎.env.example‎
Lines changed: 7 additions & 1 deletion b/‎.env.example‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎apps/webapp/app/env.server.ts‎
Lines changed: 17 additions & 0 deletions b/‎apps/webapp/app/env.server.ts‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎apps/webapp/app/routes/api.v3.batches.ts‎
Lines changed: 25 additions & 0 deletions b/‎apps/webapp/app/routes/api.v3.batches.ts‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎apps/webapp/app/runEngine/concerns/batchGlobalRateLimiter.server.ts‎
Lines changed: 36 additions & 0 deletions b/‎apps/webapp/app/runEngine/concerns/batchGlobalRateLimiter.server.ts‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎apps/webapp/app/runEngine/concerns/batchLimits.server.ts‎
Lines changed: 116 additions & 0 deletions b/‎apps/webapp/app/runEngine/concerns/batchLimits.server.ts‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎apps/webapp/app/runEngine/services/createBatch.server.ts‎
Lines changed: 21 additions & 3 deletions b/‎apps/webapp/app/runEngine/services/createBatch.server.ts‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎apps/webapp/app/runEngine/services/streamBatchItems.server.ts‎
Lines changed: 2 additions & 1 deletion b/‎apps/webapp/app/runEngine/services/streamBatchItems.server.ts‎
Lines changed: 2 additions & 1 deletion
@@ -85,4 +85,10 @@ POSTHOG_PROJECT_KEY=
 # These control the server-side internal telemetry
 # INTERNAL_OTEL_TRACE_EXPORTER_URL=<URL to send traces to>
 # INTERNAL_OTEL_TRACE_LOGGING_ENABLED=1
-# INTERNAL_OTEL_TRACE_INSTRUMENT_PRISMA_ENABLED=0,
+# INTERNAL_OTEL_TRACE_INSTRUMENT_PRISMA_ENABLED=0
+
+# Enable local observability stack (requires `pnpm run docker` to start otel-collector)
+# Uncomment these to send metrics to the local Prometheus via OTEL Collector:
+# INTERNAL_OTEL_METRIC_EXPORTER_ENABLED=1
+# INTERNAL_OTEL_METRIC_EXPORTER_URL=http://localhost:4318/v1/metrics
+# INTERNAL_OTEL_METRIC_EXPORTER_INTERVAL_MS=15000
@@ -541,6 +541,10 @@ const EnvironmentSchema = z
     // 2-phase batch API settings
     STREAMING_BATCH_MAX_ITEMS: z.coerce.number().int().default(1_000), // Max items in streaming batch
     STREAMING_BATCH_ITEM_MAXIMUM_SIZE: z.coerce.number().int().default(3_145_728),
+    BATCH_RATE_LIMIT_REFILL_RATE: z.coerce.number().int().default(10),
+    BATCH_RATE_LIMIT_MAX: z.coerce.number().int().default(1200),
+    BATCH_RATE_LIMIT_REFILL_INTERVAL: z.string().default("10s"),
+    BATCH_CONCURRENCY_LIMIT_DEFAULT: z.coerce.number().int().default(10),
 
     REALTIME_STREAM_VERSION: z.enum(["v1", "v2"]).default("v1"),
     REALTIME_STREAM_MAX_LENGTH: z.coerce.number().int().default(1000),
@@ -941,6 +945,19 @@ const EnvironmentSchema = z
     BATCH_QUEUE_MAX_DEFICIT: z.coerce.number().int().optional(),
     BATCH_QUEUE_CONSUMER_COUNT: z.coerce.number().int().optional(),
     BATCH_QUEUE_CONSUMER_INTERVAL_MS: z.coerce.number().int().optional(),
+    // Global rate limit: max items processed per second across all consumers
+    // If not set, no global rate limiting is applied
+    BATCH_QUEUE_GLOBAL_RATE_LIMIT: z.coerce.number().int().positive().optional(),
+
+    // Batch rate limits and concurrency by plan type
+    // Rate limit: max items per minute for batch creation
+    BATCH_RATE_LIMIT_FREE: z.coerce.number().int().default(100), // 100 items/min for free
+    BATCH_RATE_LIMIT_PAID: z.coerce.number().int().default(10_000), // 10k items/min for paid
+    BATCH_RATE_LIMIT_ENTERPRISE: z.coerce.number().int().default(100_000), // 100k items/min for enterprise
+    // Processing concurrency: max concurrent batch items being processed
+    BATCH_CONCURRENCY_FREE: z.coerce.number().int().default(1),
+    BATCH_CONCURRENCY_PAID: z.coerce.number().int().default(10),
+    BATCH_CONCURRENCY_ENTERPRISE: z.coerce.number().int().default(50),
 
     ADMIN_WORKER_ENABLED: z.string().default(process.env.WORKER_ENABLED ?? "true"),
     ADMIN_WORKER_CONCURRENCY_WORKERS: z.coerce.number().int().default(2),
 
@@ -2,6 +2,7 @@ import { json } from "@remix-run/server-runtime";
 import { CreateBatchRequestBody, CreateBatchResponse, generateJWT } from "@trigger.dev/core/v3";
 import { prisma } from "~/db.server";
 import { env } from "~/env.server";
+import { BatchRateLimitExceededError } from "~/runEngine/concerns/batchLimits.server";
 import { CreateBatchService } from "~/runEngine/services/createBatch.server";
 import { AuthenticatedEnvironment, getOneTimeUseToken } from "~/services/apiAuth.server";
 import { logger } from "~/services/logger.server";
@@ -154,6 +155,30 @@ const { action, loader } = createActionApiRoute(
         headers: $responseHeaders,
       });
     } catch (error) {
+      if (error instanceof BatchRateLimitExceededError) {
+        logger.info("Batch rate limit exceeded", {
+          limit: error.limit,
+          remaining: error.remaining,
+          resetAt: error.resetAt.toISOString(),
+          itemCount: error.itemCount,
+        });
+        return json(
+          { error: error.message },
+          {
+            status: 429,
+            headers: {
+              "X-RateLimit-Limit": error.limit.toString(),
+              "X-RateLimit-Remaining": error.remaining.toString(),
+              "X-RateLimit-Reset": Math.floor(error.resetAt.getTime() / 1000).toString(),
+              "Retry-After": Math.max(
+                1,
+                Math.ceil((error.resetAt.getTime() - Date.now()) / 1000)
+              ).toString(),
+            },
+          }
+        );
+      }
+
       logger.error("Create batch error", {
         error: {
           message: (error as Error).message,
 
@@ -0,0 +1,36 @@
+import { Ratelimit } from "@upstash/ratelimit";
+import type { GlobalRateLimiter } from "@trigger.dev/redis-worker";
+import { RateLimiter } from "~/services/rateLimiter.server";
+
+/**
+ * Creates a global rate limiter for the batch queue that limits
+ * the maximum number of items processed per second across all consumers.
+ *
+ * Uses a token bucket algorithm where:
+ * - `itemsPerSecond` tokens are available per second
+ * - The bucket can hold up to `itemsPerSecond` tokens (burst capacity)
+ *
+ * @param itemsPerSecond - Maximum items to process per second
+ * @returns A GlobalRateLimiter compatible with FairQueue
+ */
+export function createBatchGlobalRateLimiter(itemsPerSecond: number): GlobalRateLimiter {
+  const limiter = new RateLimiter({
+    keyPrefix: "batch-queue-global",
+    // Token bucket: refills `itemsPerSecond` tokens every second
+    // Bucket capacity is also `itemsPerSecond` (allows burst up to limit)
+    limiter: Ratelimit.tokenBucket(itemsPerSecond, "1 s", itemsPerSecond),
+    logSuccess: false,
+    logFailure: true,
+  });
+
+  return {
+    async limit() {
+      const result = await limiter.limit("global");
+      return {
+        allowed: result.success,
+        resetAt: result.reset,
+      };
+    },
+  };
+}
+
@@ -0,0 +1,116 @@
+import { Organization } from "@trigger.dev/database";
+import { Ratelimit } from "@upstash/ratelimit";
+import { z } from "zod";
+import { env } from "~/env.server";
+import { RateLimiterConfig } from "~/services/authorizationRateLimitMiddleware.server";
+import { createRedisRateLimitClient, Duration, RateLimiter } from "~/services/rateLimiter.server";
+
+const BatchLimitsConfig = z.object({
+  processingConcurrency: z.number().int().default(env.BATCH_CONCURRENCY_LIMIT_DEFAULT),
+});
+
+/**
+ * Batch limits configuration for a plan type
+ */
+export type BatchLimitsConfig = z.infer<typeof BatchLimitsConfig>;
+
+function createOrganizationRateLimiter(organization: Organization): RateLimiter {
+  const redisClient = createRedisRateLimitClient({
+    port: env.RATE_LIMIT_REDIS_PORT,
+    host: env.RATE_LIMIT_REDIS_HOST,
+    username: env.RATE_LIMIT_REDIS_USERNAME,
+    password: env.RATE_LIMIT_REDIS_PASSWORD,
+    tlsDisabled: env.RATE_LIMIT_REDIS_TLS_DISABLED === "true",
+    clusterMode: env.RATE_LIMIT_REDIS_CLUSTER_MODE_ENABLED === "1",
+  });
+
+  const limiterConfig = resolveBatchRateLimitConfig(organization.batchRateLimitConfig);
+
+  const limiter =
+    limiterConfig.type === "fixedWindow"
+      ? Ratelimit.fixedWindow(limiterConfig.tokens, limiterConfig.window)
+      : limiterConfig.type === "tokenBucket"
+      ? Ratelimit.tokenBucket(
+          limiterConfig.refillRate,
+          limiterConfig.interval,
+          limiterConfig.maxTokens
+        )
+      : Ratelimit.slidingWindow(limiterConfig.tokens, limiterConfig.window);
+
+  return new RateLimiter({
+    redisClient,
+    keyPrefix: "ratelimit:batch",
+    limiter,
+    logSuccess: false,
+    logFailure: true,
+  });
+}
+
+function resolveBatchRateLimitConfig(batchRateLimitConfig?: unknown): RateLimiterConfig {
+  const defaultRateLimiterConfig: RateLimiterConfig = {
+    type: "tokenBucket",
+    refillRate: env.BATCH_RATE_LIMIT_REFILL_RATE,
+    interval: env.BATCH_RATE_LIMIT_REFILL_INTERVAL as Duration,
+    maxTokens: env.BATCH_RATE_LIMIT_MAX,
+  };
+
+  if (!batchRateLimitConfig) {
+    return defaultRateLimiterConfig;
+  }
+
+  const parsedBatchRateLimitConfig = RateLimiterConfig.safeParse(batchRateLimitConfig);
+
+  if (!parsedBatchRateLimitConfig.success) {
+    return defaultRateLimiterConfig;
+  }
+
+  return parsedBatchRateLimitConfig.data;
+}
+
+/**
+ * Get the rate limiter and limits for an organization.
+ * Internally looks up the plan type, but doesn't expose it to callers.
+ */
+export async function getBatchLimits(
+  organization: Organization
+): Promise<{ rateLimiter: RateLimiter; config: BatchLimitsConfig }> {
+  const rateLimiter = createOrganizationRateLimiter(organization);
+  const config = resolveBatchLimitsConfig(organization.batchQueueConcurrencyConfig);
+  return { rateLimiter, config };
+}
+
+function resolveBatchLimitsConfig(batchLimitsConfig?: unknown): BatchLimitsConfig {
+  const defaultLimitsConfig: BatchLimitsConfig = {
+    processingConcurrency: env.BATCH_CONCURRENCY_LIMIT_DEFAULT,
+  };
+
+  if (!batchLimitsConfig) {
+    return defaultLimitsConfig;
+  }
+
+  const parsedBatchLimitsConfig = BatchLimitsConfig.safeParse(batchLimitsConfig);
+
+  if (!parsedBatchLimitsConfig.success) {
+    return defaultLimitsConfig;
+  }
+
+  return parsedBatchLimitsConfig.data;
+}
+
+/**
+ * Error thrown when batch rate limit is exceeded.
+ * Contains information for constructing a proper 429 response.
+ */
+export class BatchRateLimitExceededError extends Error {
+  constructor(
+    public readonly limit: number,
+    public readonly remaining: number,
+    public readonly resetAt: Date,
+    public readonly itemCount: number
+  ) {
+    super(
+      `Batch rate limit exceeded. Attempted to submit ${itemCount} items but only ${remaining} remaining. Limit resets at ${resetAt.toISOString()}`
+    );
+    this.name = "BatchRateLimitExceededError";
+  }
+}
@@ -9,6 +9,7 @@ import { logger } from "~/services/logger.server";
 import { DefaultQueueManager } from "../concerns/queues.server";
 import { DefaultTriggerTaskValidator } from "../validators/triggerTaskValidator";
 import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseService.server";
+import { BatchRateLimitExceededError, getBatchLimits } from "../concerns/batchLimits.server";
 
 export type CreateBatchServiceOptions = {
   triggerVersion?: string;
@@ -70,7 +71,21 @@ export class CreateBatchService extends WithRunEngine {
             throw entitlementValidation.error;
           }
 
-          const planType = entitlementValidation.plan?.type;
+          // Get batch limits for this organization
+          const { config, rateLimiter } = await getBatchLimits(environment.organization);
+
+          // Check rate limit BEFORE creating the batch
+          // This prevents burst creation of batches that exceed the rate limit
+          const rateResult = await rateLimiter.limit(environment.id, body.runCount);
+
+          if (!rateResult.success) {
+            throw new BatchRateLimitExceededError(
+              rateResult.limit,
+              rateResult.remaining,
+              new Date(rateResult.reset),
+              body.runCount
+            );
+          }
 
           // Validate queue limits for the expected batch size
           const queueSizeGuard = await this.queueConcern.validateQueueLimits(
@@ -132,16 +147,19 @@ export class CreateBatchService extends WithRunEngine {
             spanParentAsLink: options.spanParentAsLink,
             realtimeStreamsVersion: options.realtimeStreamsVersion,
             idempotencyKey: body.idempotencyKey,
-            planType,
+            processingConcurrency: config.processingConcurrency,
           };
 
           await this._engine.initializeBatch(initOptions);
 
-          logger.debug("Batch created for streaming", {
+          logger.info("Batch created", {
             batchId: friendlyId,
             runCount: body.runCount,
             envId: environment.id,
+            projectId: environment.projectId,
             parentRunId: body.parentRunId,
+            resumeParentOnCompletion: body.resumeParentOnCompletion,
+            processingConcurrency: config.processingConcurrency,
           });
 
           return {
 
@@ -180,11 +180,12 @@ export class StreamBatchItemsService extends WithRunEngine {
           },
         });
 
-        logger.debug("Batch sealed after streaming items", {
+        logger.info("Batch sealed and ready for processing", {
           batchId: batchFriendlyId,
           itemsAccepted,
           itemsDeduplicated,
           totalEnqueued: enqueuedCount,
+          envId: environment.id,
         });
 
         span.setAttribute("itemsAccepted", itemsAccepted);