diff --git a/apps/backend/src/lib/supabase/database.types.ts b/apps/backend/src/lib/supabase/database.types.ts index 7d1dabf..1db7c82 100644 --- a/apps/backend/src/lib/supabase/database.types.ts +++ b/apps/backend/src/lib/supabase/database.types.ts @@ -176,6 +176,47 @@ export interface Database { created_at?: string; }; }; + deployment_updates: { + Row: { + id: string; + deployment_id: string; + user_id: string; + new_customization_config: Json; + previous_state: Json | null; + status: 'pending' | 'validating' | 'generating' | 'updating_repo' | 'redeploying' | 'completed' | 'rolled_back' | 'failed'; + canary_percent: number; + error_message: string | null; + created_at: string; + updated_at: string; + completed_at: string | null; + }; + Insert: { + id?: string; + deployment_id: string; + user_id: string; + new_customization_config: Json; + previous_state?: Json | null; + status?: 'pending' | 'validating' | 'generating' | 'updating_repo' | 'redeploying' | 'completed' | 'rolled_back' | 'failed'; + canary_percent?: number; + error_message?: string | null; + created_at?: string; + updated_at?: string; + completed_at?: string | null; + }; + Update: { + id?: string; + deployment_id?: string; + user_id?: string; + new_customization_config?: Json; + previous_state?: Json | null; + status?: 'pending' | 'validating' | 'generating' | 'updating_repo' | 'redeploying' | 'completed' | 'rolled_back' | 'failed'; + canary_percent?: number; + error_message?: string | null; + created_at?: string; + updated_at?: string; + completed_at?: string | null; + }; + }; customization_drafts: { Row: { id: string; diff --git a/apps/backend/src/services/deployment-update.service.test.ts b/apps/backend/src/services/deployment-update.service.test.ts index 1b3ce02..89d676c 100644 --- a/apps/backend/src/services/deployment-update.service.test.ts +++ b/apps/backend/src/services/deployment-update.service.test.ts @@ -1,15 +1,13 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { DeploymentUpdateService } from './deployment-update.service'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { CustomizationConfig } from '@craft/types'; import { createClient } from '@/lib/supabase/server'; +import { DeploymentUpdateService } from './deployment-update.service'; import { githubPushService } from './github-push.service'; -import type { CustomizationConfig } from '@craft/types'; -// Mock Supabase vi.mock('@/lib/supabase/server', () => ({ createClient: vi.fn(), })); -// Mock GitHub Push Service vi.mock('./github-push.service', () => ({ githubPushService: { pushGeneratedCode: vi.fn(), @@ -19,6 +17,8 @@ vi.mock('./github-push.service', () => ({ describe('DeploymentUpdateService', () => { let service: DeploymentUpdateService; let mockSupabase: any; + let mockVercelService: any; + let mockRolloutMonitor: any; const mockDeploymentId = 'test-deployment-id'; const mockUserId = 'test-user-id'; @@ -43,20 +43,32 @@ describe('DeploymentUpdateService', () => { }, }; - const mockPreviousState = { + const mockDeploymentRow = { + name: 'test-app', customization_config: { ...mockConfig, branding: { ...mockConfig.branding, appName: 'Old App' } }, deployment_url: 'https://old-app.vercel.app', + vercel_project_id: 'vercel-project-id', vercel_deployment_id: 'old-vercel-id', + custom_domain: 'app.example.com', + repository_url: 'https://github.com/acme/test-app', status: 'completed', }; + const mockPreviousState = { + name: mockDeploymentRow.name, + customizationConfig: mockDeploymentRow.customization_config, + deploymentUrl: mockDeploymentRow.deployment_url, + vercelProjectId: mockDeploymentRow.vercel_project_id, + vercelDeploymentId: mockDeploymentRow.vercel_deployment_id, + customDomain: mockDeploymentRow.custom_domain, + repositoryUrl: mockDeploymentRow.repository_url, + status: mockDeploymentRow.status, + }; + beforeEach(() => { vi.clearAllMocks(); - - // Mock crypto.randomUUID vi.spyOn(crypto, 'randomUUID').mockReturnValue(mockUpdateId as `${string}-${string}-${string}-${string}-${string}`); - // Setup mock Supabase client mockSupabase = { from: vi.fn().mockReturnThis(), select: vi.fn().mockReturnThis(), @@ -67,20 +79,48 @@ describe('DeploymentUpdateService', () => { order: vi.fn().mockReturnThis(), }; + mockVercelService = { + triggerDeployment: vi.fn().mockResolvedValue({ + deploymentId: 'new-vercel-id', + deploymentUrl: 'https://new-app.vercel.app', + status: 'READY', + }), + getDeploymentStatus: vi.fn().mockResolvedValue({ + status: 'ready', + url: 'https://new-app.vercel.app', + deploymentId: 'new-vercel-id', + createdAt: new Date(), + }), + listDeploymentAliases: vi.fn().mockResolvedValue([ + { uid: 'alias-1', alias: 'app.example.com' }, + ]), + assignAliasToDeployment: vi.fn().mockResolvedValue({ + uid: 'alias-1', + alias: 'app.example.com', + }), + }; + + mockRolloutMonitor = { + getCandidateMetrics: vi.fn().mockResolvedValue({ + errorRate: 0.001, + p99LatencyMs: 120, + }), + }; + (createClient as any).mockReturnValue(mockSupabase); - service = new DeploymentUpdateService(githubPushService as any); + service = new DeploymentUpdateService( + githubPushService as any, + mockVercelService, + mockRolloutMonitor, + ); - // Reset the global failure flag (globalThis as any).__DEPLOYMENT_UPDATE_SHOULD_FAIL = false; + (globalThis as any).__DEPLOYMENT_UPDATE_MANUAL_ROLLBACK = false; }); - it('should successfully update a deployment', async () => { - // Step 1: Mock getDeploymentState - mockSupabase.single.mockResolvedValueOnce({ data: mockPreviousState, error: null }); - - // Step 4: Mock finalizeUpdate and markUpdateCompleted - mockSupabase.single.mockResolvedValueOnce({ data: { previous_state: mockPreviousState }, error: null }); + it('successfully updates a deployment via blue-green promotion', async () => { + mockSupabase.single.mockResolvedValueOnce({ data: mockDeploymentRow, error: null }); const result = await service.updateDeployment({ deploymentId: mockDeploymentId, @@ -90,25 +130,28 @@ describe('DeploymentUpdateService', () => { expect(result.success).toBe(true); expect(result.rolledBack).toBe(false); - expect(result.deploymentUrl).toBe(mockPreviousState.deployment_url); - - // Verify Supabase calls - expect(mockSupabase.from).toHaveBeenCalledWith('deployments'); - expect(mockSupabase.from).toHaveBeenCalledWith('deployment_updates'); - - // Verify state progression logs - const statusUpdates = mockSupabase.update.mock.calls - .filter((call: any) => call[0].status) - .map((call: any) => call[0].status); - - expect(statusUpdates).toContain('validating'); - expect(statusUpdates).toContain('generating'); - expect(statusUpdates).toContain('updating_repo'); - expect(statusUpdates).toContain('redeploying'); - expect(statusUpdates).toContain('completed'); + expect(result.deploymentUrl).toBe('https://new-app.vercel.app'); + expect(mockVercelService.triggerDeployment).toHaveBeenCalledWith( + 'vercel-project-id', + 'acme/test-app', + ); + expect(mockVercelService.assignAliasToDeployment).toHaveBeenCalledWith( + 'new-vercel-id', + 'app.example.com', + ); + + const canaryUpdates = mockSupabase.update.mock.calls + .map((call: any[]) => call[0]) + .filter((payload: any) => typeof payload.canary_percent === 'number') + .map((payload: any) => payload.canary_percent); + + expect(canaryUpdates).toEqual(expect.arrayContaining([0, 5, 25, 50, 100])); + expect(mockSupabase.insert).toHaveBeenCalledWith(expect.objectContaining({ + canary_percent: 0, + })); }); - it('should fail if deployment is not found', async () => { + it('fails if deployment is not found', async () => { mockSupabase.single.mockResolvedValueOnce({ data: null, error: new Error('Not found') }); const result = await service.updateDeployment({ @@ -121,10 +164,10 @@ describe('DeploymentUpdateService', () => { expect(result.errorMessage).toBe('Deployment not found or access denied'); }); - it('should fail if deployment is not in "completed" state', async () => { - mockSupabase.single.mockResolvedValueOnce({ - data: { ...mockPreviousState, status: 'pending' }, - error: null + it('fails if deployment is not in completed state', async () => { + mockSupabase.single.mockResolvedValueOnce({ + data: { ...mockDeploymentRow, status: 'pending' }, + error: null, }); const result = await service.updateDeployment({ @@ -137,25 +180,15 @@ describe('DeploymentUpdateService', () => { expect(result.errorMessage).toBe("Cannot update deployment in 'pending' state"); }); - it('should fail validation if appName is missing', async () => { - mockSupabase.single.mockResolvedValueOnce({ data: mockPreviousState, error: null }); - - // Rollback path: fetch previous_state from deployment_updates - mockSupabase.single.mockResolvedValueOnce({ - data: { - previous_state: { - customizationConfig: mockPreviousState.customization_config, - deploymentUrl: mockPreviousState.deployment_url, - vercelDeploymentId: mockPreviousState.vercel_deployment_id, - status: mockPreviousState.status, - repositoryUrl: null, - }, - }, - error: null, - }); + it('fails validation if appName is missing and rolls back', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ + data: { previous_state: mockPreviousState }, + error: null, + }); const invalidConfig = { ...mockConfig, branding: { ...mockConfig.branding, appName: '' } }; - const result = await service.updateDeployment({ deploymentId: mockDeploymentId, userId: mockUserId, @@ -167,25 +200,13 @@ describe('DeploymentUpdateService', () => { expect(result.errorMessage).toBe('Invalid configuration: appName is required'); }); - it('should rollback if update pipeline fails', async () => { - // Step 1: Mock getDeploymentState - mockSupabase.single.mockResolvedValueOnce({ data: mockPreviousState, error: null }); - - // Mock rollback state fetch — previous_state is stored with camelCase keys (DeploymentState) - mockSupabase.single.mockResolvedValueOnce({ - data: { - previous_state: { - customizationConfig: mockPreviousState.customization_config, - deploymentUrl: mockPreviousState.deployment_url, - vercelDeploymentId: mockPreviousState.vercel_deployment_id, - status: mockPreviousState.status, - repositoryUrl: null, - }, - }, - error: null - }); - - // Trigger pipeline failure + it('rolls back if the pipeline fails before rollout', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ + data: { previous_state: mockPreviousState }, + error: null, + }); (globalThis as any).__DEPLOYMENT_UPDATE_SHOULD_FAIL = true; const result = await service.updateDeployment({ @@ -197,20 +218,16 @@ describe('DeploymentUpdateService', () => { expect(result.success).toBe(false); expect(result.rolledBack).toBe(true); expect(result.errorMessage).toBe('Update pipeline failed'); - - // Verify rollback happened - should restore previous state expect(mockSupabase.update).toHaveBeenCalledWith(expect.objectContaining({ - customization_config: mockPreviousState.customization_config, - status: 'completed' + customization_config: mockPreviousState.customizationConfig, + status: 'completed', })); }); - it('should handle rollback failure gracefully', async () => { - mockSupabase.single.mockResolvedValueOnce({ data: mockPreviousState, error: null }); - - // Mock rollback state fetch to fail — no previous_state found - mockSupabase.single.mockResolvedValueOnce({ data: null, error: new Error('DB error') }); - + it('handles rollback failure gracefully', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ data: null, error: new Error('DB error') }); (globalThis as any).__DEPLOYMENT_UPDATE_SHOULD_FAIL = true; const result = await service.updateDeployment({ @@ -221,13 +238,12 @@ describe('DeploymentUpdateService', () => { expect(result.success).toBe(false); expect(result.rolledBack).toBe(false); - // When previous_state is not found, rollback returns false without marking as failed expect(result.errorMessage).toBe('Update pipeline failed'); }); - it('should successfully push to GitHub if githubPush is provided', async () => { - mockSupabase.single.mockResolvedValueOnce({ data: mockPreviousState, error: null }); - + it('successfully pushes to GitHub when githubPush is provided', async () => { + mockSupabase.single.mockResolvedValueOnce({ data: mockDeploymentRow, error: null }); + const mockCommitRef = { sha: 'test-sha', url: 'https://github.com/test' }; (githubPushService.pushGeneratedCode as any).mockResolvedValue(mockCommitRef); @@ -241,11 +257,91 @@ describe('DeploymentUpdateService', () => { token: 'token', branch: 'main', generatedFiles: [], - } + }, }); expect(result.success).toBe(true); expect(githubPushService.pushGeneratedCode).toHaveBeenCalled(); expect(result.commitRef).toEqual(mockCommitRef); + expect(mockVercelService.triggerDeployment).toHaveBeenCalledWith('vercel-project-id', 'owner/repo'); + }); + + it('auto-rolls back on candidate error-rate spike', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ + data: { previous_state: mockPreviousState }, + error: null, + }); + mockRolloutMonitor.getCandidateMetrics.mockResolvedValueOnce({ + errorRate: 0.08, + p99LatencyMs: 140, + }); + + const result = await service.updateDeployment({ + deploymentId: mockDeploymentId, + userId: mockUserId, + customizationConfig: mockConfig, + }); + + expect(result.success).toBe(false); + expect(result.rolledBack).toBe(true); + expect(result.errorMessage).toContain('Automatic rollback triggered'); + expect(mockVercelService.assignAliasToDeployment).not.toHaveBeenCalled(); + }); + + it('supports manual rollback during rollout monitoring', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ + data: { previous_state: mockPreviousState }, + error: null, + }); + mockRolloutMonitor.getCandidateMetrics.mockResolvedValueOnce({ + errorRate: 0.001, + p99LatencyMs: 120, + forceRollback: true, + }); + + const result = await service.updateDeployment({ + deploymentId: mockDeploymentId, + userId: mockUserId, + customizationConfig: mockConfig, + }); + + expect(result.success).toBe(false); + expect(result.rolledBack).toBe(true); + expect(result.errorMessage).toContain('Manual rollback requested'); + }); + + it('reverts aliases to the previous deployment if alias switch fails mid-promotion', async () => { + mockSupabase.single + .mockResolvedValueOnce({ data: mockDeploymentRow, error: null }) + .mockResolvedValueOnce({ + data: { previous_state: mockPreviousState }, + error: null, + }); + mockVercelService.listDeploymentAliases.mockResolvedValue([ + { uid: 'alias-1', alias: 'app.example.com' }, + { uid: 'alias-2', alias: 'api.example.com' }, + ]); + mockVercelService.assignAliasToDeployment + .mockResolvedValueOnce({ uid: 'alias-1', alias: 'app.example.com' }) + .mockRejectedValueOnce(new Error('Vercel alias update failed')) + .mockResolvedValueOnce({ uid: 'alias-1', alias: 'app.example.com' }); + + const result = await service.updateDeployment({ + deploymentId: mockDeploymentId, + userId: mockUserId, + customizationConfig: mockConfig, + }); + + expect(result.success).toBe(false); + expect(result.rolledBack).toBe(true); + expect(mockVercelService.assignAliasToDeployment).toHaveBeenNthCalledWith( + 3, + 'old-vercel-id', + 'app.example.com', + ); }); }); diff --git a/apps/backend/src/services/deployment-update.service.ts b/apps/backend/src/services/deployment-update.service.ts index f83db8b..3490280 100644 --- a/apps/backend/src/services/deployment-update.service.ts +++ b/apps/backend/src/services/deployment-update.service.ts @@ -2,7 +2,7 @@ * DeploymentUpdateService * * Handles deployment updates with rollback on failure. - * + * * Property 38 (design.md): Failed updates must NOT replace the last known good deployment. * When an update fails at any stage, the deployment must rollback to the previous * successful state, preserving: @@ -14,14 +14,25 @@ */ import { createClient } from '@/lib/supabase/server'; -import type { DeploymentStatusType, CustomizationConfig } from '@craft/types'; -import type { GeneratedFile } from '@craft/types'; +import type { CustomizationConfig, DeploymentStatusType, GeneratedFile } from '@craft/types'; import { githubPushService, type GitHubCommitReference, type GitHubPushService, } from './github-push.service'; import { parseRepoIdentity } from './github-repository-update.service'; +import { + BlueGreenSwitcher, + DEFAULT_CANARY_STEPS, + RolloutEngine, + type DeploymentVersion, +} from './rollout-strategy.service'; +import { + VercelService, + type NormalizedDeploymentStatus, + type TriggerDeploymentResult, + type VercelAlias, +} from './vercel.service'; export interface DeploymentUpdate { id: string; @@ -29,13 +40,14 @@ export interface DeploymentUpdate { userId: string; newCustomizationConfig: CustomizationConfig; status: DeploymentUpdateStatus; + canaryPercent: number; previousState: DeploymentState | null; errorMessage?: string; createdAt: Date; completedAt?: Date; } -export type DeploymentUpdateStatus = +export type DeploymentUpdateStatus = | 'pending' | 'validating' | 'generating' @@ -46,9 +58,12 @@ export type DeploymentUpdateStatus = | 'failed'; export interface DeploymentState { + name: string; customizationConfig: CustomizationConfig; deploymentUrl: string | null; + vercelProjectId: string | null; vercelDeploymentId: string | null; + customDomain: string | null; status: DeploymentStatusType; repositoryUrl: string | null; } @@ -82,11 +97,71 @@ export interface UpdateDeploymentResult { interface PipelineExecutionResult { success: boolean; commitRef?: GitHubCommitReference; + deploymentUrl?: string; + vercelDeploymentId?: string; + canaryPercent: number; + rollbackReason?: string; +} + +export interface RolloutMetrics { + errorRate: number; + p99LatencyMs: number; + forceRollback?: boolean; +} + +export interface RolloutMonitorContext { + updateId: string; + deploymentId: string; + candidateDeploymentId: string; + candidateDeploymentUrl: string; + canaryPercent: number; +} + +export interface RolloutMonitor { + getCandidateMetrics(context: RolloutMonitorContext): Promise; +} + +interface DeploymentUpdateVercelClient { + triggerDeployment(projectId: string, gitRepo: string): Promise; + getDeploymentStatus(deploymentId: string): Promise; + listDeploymentAliases(deploymentId: string): Promise; + assignAliasToDeployment(deploymentId: string, alias: string): Promise; +} + +class HttpRolloutMonitor implements RolloutMonitor { + async getCandidateMetrics(context: RolloutMonitorContext): Promise { + const injected = (globalThis as any).__DEPLOYMENT_UPDATE_ROLLOUT_METRICS; + if (typeof injected === 'function') { + return injected(context); + } + + const startedAt = Date.now(); + try { + const response = await fetch(context.candidateDeploymentUrl, { + method: 'HEAD', + signal: AbortSignal.timeout(10_000), + }); + + return { + errorRate: response.ok ? 0 : 1, + p99LatencyMs: Date.now() - startedAt, + forceRollback: (globalThis as any).__DEPLOYMENT_UPDATE_MANUAL_ROLLBACK === true, + }; + } catch { + return { + errorRate: 1, + p99LatencyMs: 10_000, + forceRollback: (globalThis as any).__DEPLOYMENT_UPDATE_MANUAL_ROLLBACK === true, + }; + } + } } export class DeploymentUpdateService { constructor( - private readonly _githubPushService: Pick = githubPushService + private readonly _githubPushService: Pick = githubPushService, + private readonly _vercelService: DeploymentUpdateVercelClient = new VercelService(), + private readonly _rolloutMonitor: RolloutMonitor = new HttpRolloutMonitor(), ) {} /** @@ -94,16 +169,12 @@ export class DeploymentUpdateService { * If the update fails, automatically rollback to the previous good state. */ async updateDeployment(request: UpdateDeploymentRequest): Promise { - const supabase = createClient(); const { deploymentId, userId, customizationConfig, githubPush } = request; - - // Create update record const updateId = crypto.randomUUID(); - + try { - // Step 1: Get current deployment state (the "last known good" state) const previousState = await this.getDeploymentState(deploymentId, userId); - + if (!previousState) { return { success: false, @@ -113,7 +184,6 @@ export class DeploymentUpdateService { }; } - // Verify deployment is in completed state (can only update completed deployments) if (previousState.status !== 'completed') { return { success: false, @@ -123,38 +193,34 @@ export class DeploymentUpdateService { }; } - // Create update record with previous state await this.createUpdateRecord(updateId, deploymentId, userId, customizationConfig, previousState); - - // Step 2: Validate the new configuration await this.validateUpdate(updateId, customizationConfig); - // Step 3: Simulate update pipeline (in real implementation, this would: - // - Generate new code - // - Update repository - // - Trigger Vercel redeployment - const pipeline = await this.executeUpdatePipeline(updateId, customizationConfig, githubPush, previousState); + const pipeline = await this.executeUpdatePipeline( + updateId, + deploymentId, + customizationConfig, + githubPush, + previousState, + ); if (!pipeline.success) { - throw new Error('Update pipeline failed'); + throw new Error(pipeline.rollbackReason || 'Update pipeline failed'); } - // Step 4: Update deployment with new config - await this.finalizeUpdate(deploymentId, customizationConfig); + await this.finalizeUpdate(deploymentId, customizationConfig, pipeline); await this.markUpdateCompleted(updateId); return { success: true, deploymentId, rolledBack: false, - deploymentUrl: previousState.deploymentUrl ?? undefined, + deploymentUrl: pipeline.deploymentUrl ?? previousState.deploymentUrl ?? undefined, commitRef: pipeline.commitRef, }; - } catch (error: any) { console.error('Deployment update failed, initiating rollback:', error); - // Step 5: Rollback to previous state const rollbackSuccess = await this.rollbackUpdate(updateId, deploymentId); return { @@ -166,18 +232,15 @@ export class DeploymentUpdateService { } } - /** - * Get the current state of a deployment - */ private async getDeploymentState( deploymentId: string, - userId: string + userId: string, ): Promise { const supabase = createClient(); const { data: deployment, error } = await supabase .from('deployments') - .select('customization_config, deployment_url, vercel_deployment_id, status, repository_url') + .select('name, customization_config, deployment_url, vercel_project_id, vercel_deployment_id, custom_domain, status, repository_url') .eq('id', deploymentId) .eq('user_id', userId) .single(); @@ -187,23 +250,23 @@ export class DeploymentUpdateService { } return { + name: deployment.name, customizationConfig: deployment.customization_config as CustomizationConfig, deploymentUrl: deployment.deployment_url, + vercelProjectId: deployment.vercel_project_id ?? null, vercelDeploymentId: deployment.vercel_deployment_id, + customDomain: deployment.custom_domain ?? null, status: deployment.status as DeploymentStatusType, repositoryUrl: deployment.repository_url ?? null, }; } - /** - * Create an update record for tracking - */ private async createUpdateRecord( updateId: string, deploymentId: string, userId: string, newConfig: CustomizationConfig, - previousState: DeploymentState + previousState: DeploymentState, ): Promise { const supabase = createClient(); @@ -214,21 +277,17 @@ export class DeploymentUpdateService { new_customization_config: newConfig, previous_state: previousState, status: 'pending', + canary_percent: 0, created_at: new Date().toISOString(), }); } - /** - * Validate the new customization configuration - */ private async validateUpdate( updateId: string, - config: CustomizationConfig + config: CustomizationConfig, ): Promise { - // Update status - await this.updateUpdateStatus(updateId, 'validating'); + await this.updateUpdateStatus(updateId, 'validating', { canaryPercent: 0 }); - // Basic validation if (!config.branding?.appName || config.branding.appName.length === 0) { throw new Error('Invalid configuration: appName is required'); } @@ -238,25 +297,23 @@ export class DeploymentUpdateService { } } - /** - * Execute the update pipeline (simulated for testing) - * This can be configured to fail for property testing - */ private async executeUpdatePipeline( updateId: string, + deploymentId: string, config: CustomizationConfig, githubPush?: UpdateDeploymentRequest['githubPush'], - previousState?: DeploymentState + previousState?: DeploymentState, ): Promise { - await this.updateUpdateStatus(updateId, 'generating'); - - // Simulate code generation + await this.updateUpdateStatus(updateId, 'generating', { canaryPercent: 0 }); await this.simulateWork(); - await this.updateUpdateStatus(updateId, 'updating_repo'); + await this.updateUpdateStatus(updateId, 'updating_repo', { canaryPercent: 0 }); let commitRef: GitHubCommitReference | undefined; + let repoFullName: string | undefined; + if (githubPush) { + repoFullName = `${githubPush.owner}/${githubPush.repo}`; commitRef = await this._githubPushService.pushGeneratedCode({ owner: githubPush.owner, repo: githubPush.repo, @@ -271,8 +328,8 @@ export class DeploymentUpdateService { authorEmail: githubPush.authorEmail, }); } else if (previousState?.repositoryUrl) { - // Auto-resolve owner/repo from the stored repository URL (reuse logic) const { owner, repo } = parseRepoIdentity(previousState.repositoryUrl); + repoFullName = `${owner}/${repo}`; const token = process.env.GITHUB_TOKEN ?? ''; commitRef = await this._githubPushService.pushGeneratedCode({ owner, @@ -283,32 +340,122 @@ export class DeploymentUpdateService { commitMessage: `chore: update generated workspace (${new Date().toISOString()})`, }); } else { - // Preserve simulated behavior for callers that do not opt into GitHub push. await this.simulateWork(); } - await this.updateUpdateStatus(updateId, 'redeploying'); - - // Simulate Vercel redeployment - await this.simulateWork(); + await this.updateUpdateStatus(updateId, 'redeploying', { canaryPercent: 0 }); - // For property testing, we use a global flag to simulate failures - // In production, this would be actual pipeline logic - const shouldFail = (global as any).__DEPLOYMENT_UPDATE_SHOULD_FAIL === true; - + const shouldFail = (globalThis as any).__DEPLOYMENT_UPDATE_SHOULD_FAIL === true; if (shouldFail) { - return { success: false, commitRef }; + return { + success: false, + commitRef, + canaryPercent: 0, + rollbackReason: 'Update pipeline failed', + }; + } + + if (!previousState?.vercelProjectId || !repoFullName) { + await this.simulateWork(); + return { + success: true, + commitRef, + deploymentUrl: previousState?.deploymentUrl ?? undefined, + vercelDeploymentId: previousState?.vercelDeploymentId ?? undefined, + canaryPercent: 0, + }; + } + + const candidate = await this._vercelService.triggerDeployment( + previousState.vercelProjectId, + repoFullName, + ); + const candidateStatus = await this._vercelService.getDeploymentStatus(candidate.deploymentId); + if (candidateStatus.status === 'failed' || candidateStatus.status === 'canceled') { + return { + success: false, + commitRef, + deploymentUrl: candidate.deploymentUrl, + vercelDeploymentId: candidate.deploymentId, + canaryPercent: 0, + rollbackReason: 'Candidate deployment did not become ready', + }; } - return { success: true, commitRef }; + const stableVersion: DeploymentVersion = { + id: previousState.vercelDeploymentId ?? 'stable', + errorRate: 0, + p99LatencyMs: 0, + }; + const candidateVersion: DeploymentVersion = { + id: candidate.deploymentId, + errorRate: 0, + p99LatencyMs: 0, + }; + const rollout = new RolloutEngine(stableVersion, candidateVersion); + + for (const canaryPercent of DEFAULT_CANARY_STEPS) { + rollout.setTrafficPercent(canaryPercent); + await this.updateUpdateStatus(updateId, 'redeploying', { canaryPercent }); + + const metrics = await this._rolloutMonitor.getCandidateMetrics({ + updateId, + deploymentId, + candidateDeploymentId: candidate.deploymentId, + candidateDeploymentUrl: candidate.deploymentUrl, + canaryPercent, + }); + + candidateVersion.errorRate = metrics.errorRate; + candidateVersion.p99LatencyMs = metrics.p99LatencyMs; + + if (metrics.forceRollback || rollout.evaluateAndMaybeRollback()) { + return { + success: false, + commitRef, + deploymentUrl: candidate.deploymentUrl, + vercelDeploymentId: candidate.deploymentId, + canaryPercent: 0, + rollbackReason: metrics.forceRollback + ? 'Manual rollback requested during rollout' + : 'Automatic rollback triggered during rollout', + }; + } + } + + const switcher = new BlueGreenSwitcher(stableVersion, candidateVersion, 'blue'); + if (!switcher.switchToStandby()) { + return { + success: false, + commitRef, + deploymentUrl: candidate.deploymentUrl, + vercelDeploymentId: candidate.deploymentId, + canaryPercent: 0, + rollbackReason: 'Candidate failed blue-green promotion health gate', + }; + } + + const aliases = await this.getPromotionAliases(previousState); + if (previousState.vercelDeploymentId && aliases.length > 0) { + await this.switchAliasesWithRollback(previousState.vercelDeploymentId, candidate.deploymentId, aliases); + } + + rollout.promote(); + await this.updateUpdateStatus(updateId, 'redeploying', { canaryPercent: 100 }); + + return { + success: true, + commitRef, + deploymentUrl: candidate.deploymentUrl, + vercelDeploymentId: candidate.deploymentId, + canaryPercent: 100, + }; } - /** - * Finalize the update by updating the deployment record - */ private async finalizeUpdate( deploymentId: string, - config: CustomizationConfig + config: CustomizationConfig, + pipeline: PipelineExecutionResult, ): Promise { const supabase = createClient(); @@ -316,23 +463,21 @@ export class DeploymentUpdateService { .from('deployments') .update({ customization_config: config, + deployment_url: pipeline.deploymentUrl, + vercel_deployment_id: pipeline.vercelDeploymentId, status: 'completed', updated_at: new Date().toISOString(), }) .eq('id', deploymentId); } - /** - * Rollback to the previous deployment state - */ private async rollbackUpdate( updateId: string, - deploymentId: string + deploymentId: string, ): Promise { try { const supabase = createClient(); - // Get the previous state from the update record const { data: updateRecord } = await supabase .from('deployment_updates') .select('previous_state') @@ -346,37 +491,33 @@ export class DeploymentUpdateService { const previousState = updateRecord.previous_state as DeploymentState; - // Restore the deployment to its previous state await supabase .from('deployments') .update({ customization_config: previousState.customizationConfig, deployment_url: previousState.deploymentUrl, vercel_deployment_id: previousState.vercelDeploymentId, - status: 'completed', // Ensure it's back to completed state - error_message: null, // Clear any error messages + status: 'completed', + error_message: null, updated_at: new Date().toISOString(), }) .eq('id', deploymentId); - // Mark the update as rolled back - await this.updateUpdateStatus(updateId, 'rolled_back'); + await this.updateUpdateStatus(updateId, 'rolled_back', { canaryPercent: 0 }); console.log(`Successfully rolled back deployment ${deploymentId}`); return true; } catch (error: any) { console.error('Rollback failed:', error); - await this.updateUpdateStatus(updateId, 'failed'); + await this.updateUpdateStatus(updateId, 'failed', { canaryPercent: 0 }); return false; } } - /** - * Update the status of an update record - */ private async updateUpdateStatus( updateId: string, - status: DeploymentUpdateStatus + status: DeploymentUpdateStatus, + options: { canaryPercent?: number; errorMessage?: string } = {}, ): Promise { const supabase = createClient(); @@ -384,14 +525,13 @@ export class DeploymentUpdateService { .from('deployment_updates') .update({ status, + ...(options.canaryPercent !== undefined ? { canary_percent: options.canaryPercent } : {}), + ...(options.errorMessage !== undefined ? { error_message: options.errorMessage } : {}), updated_at: new Date().toISOString(), }) .eq('id', updateId); } - /** - * Mark an update as completed - */ private async markUpdateCompleted(updateId: string): Promise { const supabase = createClient(); @@ -399,28 +539,63 @@ export class DeploymentUpdateService { .from('deployment_updates') .update({ status: 'completed', + canary_percent: 100, completed_at: new Date().toISOString(), updated_at: new Date().toISOString(), }) .eq('id', updateId); } - /** - * Simulate async work (for pipeline simulation) - */ + private async getPromotionAliases(previousState: DeploymentState): Promise { + const aliases = new Set(); + + if (previousState.vercelDeploymentId) { + const activeAliases = await this._vercelService.listDeploymentAliases(previousState.vercelDeploymentId); + for (const alias of activeAliases) { + aliases.add(alias.alias); + } + } + + if (aliases.size === 0 && previousState.customDomain) { + aliases.add(previousState.customDomain); + } + + return [...aliases]; + } + + private async switchAliasesWithRollback( + previousDeploymentId: string, + candidateDeploymentId: string, + aliases: string[], + ): Promise { + const switched: string[] = []; + + try { + for (const alias of aliases) { + await this._vercelService.assignAliasToDeployment(candidateDeploymentId, alias); + switched.push(alias); + } + } catch (error) { + for (const alias of switched.reverse()) { + try { + await this._vercelService.assignAliasToDeployment(previousDeploymentId, alias); + } catch (rollbackError) { + console.error('Failed to revert alias after promotion error:', rollbackError); + } + } + throw error; + } + } + private async simulateWork(): Promise { - // In real implementation, this would be actual work - // For testing, we just yield to the event loop - await new Promise(resolve => setTimeout(resolve, 0)); + await new Promise((resolve) => setTimeout(resolve, 0)); } - /** - * Get update history for a deployment - */ async getUpdateHistory(deploymentId: string): Promise 100) { + throw new RangeError('percent must be between 0 and 100'); + } + + this._canaryPercent = percent; + this._status = percent === 0 ? 'pending' : percent === 100 ? 'promoted' : 'in_progress'; + } + + routeRequest(): DeploymentVersion { + this._requestCounter += 1; + const useCanary = (this._requestCounter % 100) < this._canaryPercent; + return useCanary ? this.candidate : this.stable; + } + + simulateTraffic(requestCount: number): Record { + const counts: Record = { + [this.stable.id]: 0, + [this.candidate.id]: 0, + }; + + for (let i = 0; i < requestCount; i += 1) { + const servedBy = this.routeRequest(); + counts[servedBy.id] = (counts[servedBy.id] ?? 0) + 1; + } + + return counts; + } + + evaluateAndMaybeRollback(): boolean { + const shouldRollback = + this.candidate.errorRate >= ROLLBACK_ERROR_RATE_THRESHOLD || + this.candidate.p99LatencyMs > ROLLBACK_LATENCY_THRESHOLD_MS; + + if (shouldRollback) { + this._canaryPercent = 0; + this._status = 'rolled_back'; + } + + return shouldRollback; + } + + promote(): void { + this._canaryPercent = 100; + this._status = 'promoted'; + } +} + +export class BlueGreenSwitcher { + private _active: DeploymentColor; + private _standby: DeploymentColor; + + constructor( + private readonly blue: DeploymentVersion, + private readonly green: DeploymentVersion, + initial: DeploymentColor = 'blue', + ) { + this._active = initial; + this._standby = initial === 'blue' ? 'green' : 'blue'; + } + + get active(): DeploymentColor { + return this._active; + } + + get standby(): DeploymentColor { + return this._standby; + } + + activeVersion(): DeploymentVersion { + return this._active === 'blue' ? this.blue : this.green; + } + + standbyVersion(): DeploymentVersion { + return this._standby === 'blue' ? this.blue : this.green; + } + + switchToStandby(): boolean { + const candidate = this.standbyVersion(); + const healthy = + candidate.errorRate < ROLLBACK_ERROR_RATE_THRESHOLD && + candidate.p99LatencyMs <= ROLLBACK_LATENCY_THRESHOLD_MS; + + if (healthy) { + [this._active, this._standby] = [this._standby, this._active]; + } + + return healthy; + } +} diff --git a/apps/backend/src/services/vercel.service.test.ts b/apps/backend/src/services/vercel.service.test.ts index d48f715..255a003 100644 --- a/apps/backend/src/services/vercel.service.test.ts +++ b/apps/backend/src/services/vercel.service.test.ts @@ -46,6 +46,7 @@ import { VercelApiError, type VercelDeployment, type VercelDeploymentStatus, + type VercelAlias, } from './vercel.service'; // ── fetch mock ──────────────────────────────────────────────────────────────── @@ -342,6 +343,54 @@ describe('VercelService', () => { }); }); + describe('deployment aliases', () => { + it('lists aliases for a deployment', async () => { + mockFetch.mockResolvedValueOnce(makeJsonResponse(200, { + aliases: [ + { uid: 'al_1', alias: 'app.example.com', created: '2026-04-28T12:00:00Z' }, + ], + })); + + const result = await service.listDeploymentAliases('dpl_456'); + + expect(result).toEqual([ + { + uid: 'al_1', + alias: 'app.example.com', + created: '2026-04-28T12:00:00Z', + redirect: null, + }, + ]); + expect(mockFetch).toHaveBeenCalledWith( + 'https://api.vercel.com/v2/deployments/dpl_456/aliases', + expect.objectContaining({ method: 'GET' }), + ); + }); + + it('assigns an alias to a deployment', async () => { + mockFetch.mockResolvedValueOnce(makeJsonResponse(200, { + uid: 'al_1', + alias: 'app.example.com', + created: '2026-04-28T12:00:00Z', + })); + + const result = await service.assignAliasToDeployment('dpl_456', 'app.example.com'); + + expect(result).toEqual({ + uid: 'al_1', + alias: 'app.example.com', + created: '2026-04-28T12:00:00Z', + redirect: null, + }); + + const [, options] = mockFetch.mock.calls[0] as [string, RequestInit]; + expect(JSON.parse(options.body as string)).toEqual({ + alias: 'app.example.com', + redirect: null, + }); + }); + }); + // ── addDomain ────────────────────────────────────────────────────────────── describe('addDomain', () => { @@ -842,22 +891,28 @@ describe('VercelService — addDomain', () => { it('resolves without error on 200', async () => { const { svc, mockFetch } = makeService(); mockFetch.mockResolvedValueOnce(makeResponse(200, {})); - await expect(svc.addDomain('prj_1', 'example.com')).resolves.toBeUndefined(); + await expect(svc.addDomain({ projectId: 'prj_1', domain: 'example.com' })).resolves.toEqual({ + success: true, + domain: 'example.com', + verification: undefined, + }); }); it('throws DOMAIN_EXISTS on 409', async () => { const { svc, mockFetch } = makeService(); mockFetch.mockResolvedValueOnce(makeResponse(409, { error: { message: 'exists' } })); - await expect(svc.addDomain('prj_1', 'example.com')).rejects.toMatchObject({ - code: 'DOMAIN_EXISTS', + await expect(svc.addDomain({ projectId: 'prj_1', domain: 'example.com' })).resolves.toMatchObject({ + success: false, + errorCode: 'DOMAIN_ALREADY_EXISTS', }); }); it('throws AUTH_FAILED on 401', async () => { const { svc, mockFetch } = makeService(); mockFetch.mockResolvedValueOnce(makeResponse(401, { message: 'Unauthorized' })); - await expect(svc.addDomain('prj_1', 'example.com')).rejects.toMatchObject({ - code: 'AUTH_FAILED', + await expect(svc.addDomain({ projectId: 'prj_1', domain: 'example.com' })).resolves.toMatchObject({ + success: false, + errorCode: 'AUTH_FAILED', }); }); @@ -866,17 +921,18 @@ describe('VercelService — addDomain', () => { mockFetch.mockResolvedValueOnce( makeResponse(429, { message: 'Rate limited' }, { 'Retry-After': '10' }), ); - await expect(svc.addDomain('prj_1', 'example.com')).rejects.toMatchObject({ - code: 'RATE_LIMITED', - retryAfterMs: 10_000, + await expect(svc.addDomain({ projectId: 'prj_1', domain: 'example.com' })).resolves.toMatchObject({ + success: false, + errorCode: 'RATE_LIMITED', }); }); it('throws NETWORK_ERROR when fetch throws', async () => { const { svc, mockFetch } = makeService(); mockFetch.mockRejectedValueOnce(new Error('socket hang up')); - await expect(svc.addDomain('prj_1', 'example.com')).rejects.toMatchObject({ - code: 'NETWORK_ERROR', + await expect(svc.addDomain({ projectId: 'prj_1', domain: 'example.com' })).resolves.toMatchObject({ + success: false, + errorCode: 'NETWORK_ERROR', }); }); }); @@ -1048,3 +1104,24 @@ describe('VercelService — getDeploymentLogs', () => { }); }); }); + +const MOCK_TOKEN = 'test_token'; + +function makeResponse( + status: number, + body: unknown, + headers: Record = {}, +) { + return { + ok: status >= 200 && status < 300, + status, + headers: { get: (key: string) => headers[key] ?? null }, + json: async () => body, + }; +} + +function makeService() { + const mockFetch = vi.fn(); + const svc = new VercelService(mockFetch as any); + return { svc, mockFetch }; +} diff --git a/apps/backend/src/services/vercel.service.ts b/apps/backend/src/services/vercel.service.ts index 808e63c..22c319a 100644 --- a/apps/backend/src/services/vercel.service.ts +++ b/apps/backend/src/services/vercel.service.ts @@ -36,6 +36,8 @@ export type VercelErrorCode = | 'NETWORK_ERROR' | 'PROJECT_EXISTS' | 'DOMAIN_EXISTS' + | 'DOMAIN_ALREADY_EXISTS' + | 'DOMAIN_NOT_FOUND' | 'UNKNOWN'; // ── Domain / certificate types ──────────────────────────────────────────────── @@ -99,6 +101,13 @@ export interface TriggerDeploymentResult { status: string; } +export interface VercelAlias { + uid: string; + alias: string; + created?: string; + redirect?: string | null; +} + // ── Deployment status types (Issue #92) ───────────────────────────────────── export type VercelDeploymentStatus = @@ -390,6 +399,40 @@ export class VercelService { }; } + async listDeploymentAliases(deploymentId: string): Promise { + const data = await this.request<{ aliases?: Array> }>( + `/v2/deployments/${deploymentId}/aliases`, + { method: 'GET' }, + ); + + return (data.aliases ?? []).map((alias) => ({ + uid: alias.uid as string, + alias: alias.alias as string, + created: alias.created as string | undefined, + redirect: (alias.redirect as string | null | undefined) ?? null, + })); + } + + async assignAliasToDeployment(deploymentId: string, alias: string): Promise { + const data = await this.request>( + `/v2/deployments/${deploymentId}/aliases`, + { + method: 'POST', + body: JSON.stringify({ + alias, + redirect: null, + }), + }, + ); + + return { + uid: data.uid as string, + alias: data.alias as string, + created: data.created as string | undefined, + redirect: (data.redirect as string | null | undefined) ?? null, + }; + } + /** * Verify that the configured token can reach the Vercel API. @@ -447,7 +490,10 @@ export class VercelService { } catch (err: unknown) { const vercelErr = err as VercelApiError; // 404 means Vercel hasn't issued a cert yet — treat as pending - if (vercelErr.code === 'UNKNOWN' && vercelErr.message.includes('404')) { + if ( + vercelErr.code === 'UNKNOWN' && + (vercelErr.message.includes('404') || vercelErr.message.toLowerCase().includes('not found')) + ) { return { domain, state: 'pending' }; } throw err; @@ -714,7 +760,10 @@ export class VercelService { method: 'DELETE', }); } catch (error: unknown) { - if (error instanceof VercelApiError && error.code === 'DOMAIN_NOT_FOUND') { + if ( + error instanceof VercelApiError && + (error.code === 'DOMAIN_NOT_FOUND' || error.message.toLowerCase().includes('not found')) + ) { // Domain doesn't exist, which is fine for cleanup return; } @@ -744,7 +793,10 @@ export class VercelService { deploymentId: data.deploymentId as string | undefined, }; } catch (error: unknown) { - if (error instanceof VercelApiError && error.code === 'DOMAIN_NOT_FOUND') { + if ( + error instanceof VercelApiError && + (error.code === 'DOMAIN_NOT_FOUND' || error.message.toLowerCase().includes('not found')) + ) { return null; } throw error; diff --git a/apps/backend/tests/deployment/rollout-strategy.test.ts b/apps/backend/tests/deployment/rollout-strategy.test.ts index 73d1575..fd720dd 100644 --- a/apps/backend/tests/deployment/rollout-strategy.test.ts +++ b/apps/backend/tests/deployment/rollout-strategy.test.ts @@ -1,377 +1,148 @@ /** * Deployment Rollout Strategy Tests * - * Verifies canary, blue-green, and percentage-based rollout strategies, - * traffic splitting, and automatic rollback on errors. - * - * No live infrastructure is required — all routing and health checks are - * simulated in-memory. - * - * Rollout best practices documented here: - * - Canary: start at ≤10 % traffic; promote only when error rate < 1 % - * - Blue-green: keep old environment warm until new one is fully healthy - * - Rollback trigger: error rate ≥ 5 % OR p99 latency > 2 000 ms - * - Traffic split increments: 5 % → 25 % → 50 % → 100 % + * Verifies the production rollout strategy primitives that back live deployment + * updates. These tests intentionally exercise the shared service classes so the + * update pipeline and rollout strategy stay aligned. */ -import { describe, it, expect, beforeEach } from 'vitest'; - -// ── Types ───────────────────────────────────────────────────────────────────── - -type DeploymentColor = 'blue' | 'green'; -type RolloutStatus = 'pending' | 'in_progress' | 'promoted' | 'rolled_back'; - -interface DeploymentVersion { - id: string; - errorRate: number; // 0–1 - p99LatencyMs: number; -} - -interface TrafficRequest { - id: string; -} - -interface TrafficResult { - requestId: string; - servedBy: string; // deployment version id -} - -// ── Rollout engine ──────────────────────────────────────────────────────────── - -const ROLLBACK_ERROR_RATE_THRESHOLD = 0.05; -const ROLLBACK_LATENCY_THRESHOLD_MS = 2_000; - -class RolloutEngine { - private _canaryPercent = 0; - private _status: RolloutStatus = 'pending'; - private _requestCounter = 0; - - constructor( - private readonly stable: DeploymentVersion, - private readonly candidate: DeploymentVersion, - ) {} - - get status(): RolloutStatus { return this._status; } - get canaryPercent(): number { return this._canaryPercent; } - - /** Set the percentage of traffic routed to the candidate. */ - setTrafficPercent(pct: number): void { - if (pct < 0 || pct > 100) throw new RangeError('pct must be 0–100'); - this._canaryPercent = pct; - this._status = pct === 0 ? 'pending' : pct === 100 ? 'promoted' : 'in_progress'; - } - - /** Route a single request; returns which version served it. */ - route(req: TrafficRequest): TrafficResult { - this._requestCounter++; - const useCanary = (this._requestCounter % 100) < this._canaryPercent; - const version = useCanary ? this.candidate : this.stable; - return { requestId: req.id, servedBy: version.id }; - } - - /** Simulate N requests and return counts per version. */ - simulateTraffic(n: number): Record { - const counts: Record = { [this.stable.id]: 0, [this.candidate.id]: 0 }; - for (let i = 0; i < n; i++) { - const { servedBy } = this.route({ id: `req-${i}` }); - counts[servedBy] = (counts[servedBy] ?? 0) + 1; - } - return counts; - } - - /** - * Evaluate candidate health and auto-rollback if thresholds are breached. - * Returns true if rollback was triggered. - */ - evaluateAndMaybeRollback(): boolean { - const shouldRollback = - this.candidate.errorRate >= ROLLBACK_ERROR_RATE_THRESHOLD || - this.candidate.p99LatencyMs > ROLLBACK_LATENCY_THRESHOLD_MS; - - if (shouldRollback) { - this._canaryPercent = 0; - this._status = 'rolled_back'; - } - return shouldRollback; - } - - promote(): void { - this._canaryPercent = 100; - this._status = 'promoted'; - } -} - -// ── Blue-green switcher ─────────────────────────────────────────────────────── - -class BlueGreenSwitcher { - private _active: DeploymentColor; - private _standby: DeploymentColor; - - constructor( - private readonly blue: DeploymentVersion, - private readonly green: DeploymentVersion, - initial: DeploymentColor = 'blue', - ) { - this._active = initial; - this._standby = initial === 'blue' ? 'green' : 'blue'; - } - - get active(): DeploymentColor { return this._active; } - get standby(): DeploymentColor { return this._standby; } - - activeVersion(): DeploymentVersion { - return this._active === 'blue' ? this.blue : this.green; - } - - standbyVersion(): DeploymentVersion { - return this._standby === 'blue' ? this.blue : this.green; - } - - /** Switch traffic to standby if it is healthy; returns success. */ - switchToStandby(): boolean { - const candidate = this.standbyVersion(); - const healthy = - candidate.errorRate < ROLLBACK_ERROR_RATE_THRESHOLD && - candidate.p99LatencyMs <= ROLLBACK_LATENCY_THRESHOLD_MS; - - if (healthy) { - [this._active, this._standby] = [this._standby, this._active]; - } - return healthy; - } - - route(req: TrafficRequest): TrafficResult { - return { requestId: req.id, servedBy: this.activeVersion().id }; - } -} - -// ── Helpers ─────────────────────────────────────────────────────────────────── +import { beforeEach, describe, expect, it } from 'vitest'; +import { + BlueGreenSwitcher, + RolloutEngine, + type DeploymentVersion, +} from '@/services/rollout-strategy.service'; function makeVersion(id: string, errorRate = 0.001, p99LatencyMs = 120): DeploymentVersion { - return { id, errorRate, p99LatencyMs }; + return { id, errorRate, p99LatencyMs }; } -// ── Tests ───────────────────────────────────────────────────────────────────── - -describe('Canary rollout — traffic percentage controls', () => { - let engine: RolloutEngine; - - beforeEach(() => { - engine = new RolloutEngine(makeVersion('stable-v1'), makeVersion('canary-v2')); - }); - - it('starts with 0 % canary traffic (pending status)', () => { - expect(engine.canaryPercent).toBe(0); - expect(engine.status).toBe('pending'); - }); - - it('routes 0 % to canary when percent is 0', () => { - const counts = engine.simulateTraffic(100); - expect(counts['canary-v2']).toBe(0); - expect(counts['stable-v1']).toBe(100); - }); - - it('routes ~10 % to canary at 10 % setting', () => { - engine.setTrafficPercent(10); - const counts = engine.simulateTraffic(1_000); - // Allow ±2 % tolerance for the modulo-based router - expect(counts['canary-v2']).toBeGreaterThanOrEqual(80); - expect(counts['canary-v2']).toBeLessThanOrEqual(120); - expect(engine.status).toBe('in_progress'); - }); - - it('routes ~50 % to canary at 50 % setting', () => { - engine.setTrafficPercent(50); - const counts = engine.simulateTraffic(1_000); - expect(counts['canary-v2']).toBeGreaterThanOrEqual(480); - expect(counts['canary-v2']).toBeLessThanOrEqual(520); - }); - - it('routes 100 % to canary after promotion', () => { - engine.promote(); - const counts = engine.simulateTraffic(100); - expect(counts['canary-v2']).toBe(100); - expect(engine.status).toBe('promoted'); - }); - - it('rejects out-of-range traffic percentages', () => { - expect(() => engine.setTrafficPercent(-1)).toThrow(RangeError); - expect(() => engine.setTrafficPercent(101)).toThrow(RangeError); - }); -}); - -describe('Canary rollout — automatic rollback', () => { - it('rolls back when candidate error rate exceeds threshold', () => { - const engine = new RolloutEngine( - makeVersion('stable-v1'), - makeVersion('canary-v2', 0.08), // 8 % error rate — above 5 % threshold - ); - engine.setTrafficPercent(10); - - const didRollback = engine.evaluateAndMaybeRollback(); - - expect(didRollback).toBe(true); - expect(engine.status).toBe('rolled_back'); - expect(engine.canaryPercent).toBe(0); - }); - - it('rolls back when candidate p99 latency exceeds threshold', () => { - const engine = new RolloutEngine( - makeVersion('stable-v1'), - makeVersion('canary-v2', 0.001, 2_500), // 2 500 ms — above 2 000 ms threshold - ); - engine.setTrafficPercent(10); - - expect(engine.evaluateAndMaybeRollback()).toBe(true); - expect(engine.status).toBe('rolled_back'); - }); - - it('does NOT roll back when candidate is healthy', () => { - const engine = new RolloutEngine( - makeVersion('stable-v1'), - makeVersion('canary-v2', 0.002, 150), // healthy - ); - engine.setTrafficPercent(25); - - expect(engine.evaluateAndMaybeRollback()).toBe(false); - expect(engine.status).toBe('in_progress'); - expect(engine.canaryPercent).toBe(25); - }); - - it('routes all traffic back to stable after rollback', () => { - const engine = new RolloutEngine( - makeVersion('stable-v1'), - makeVersion('canary-v2', 0.1), - ); - engine.setTrafficPercent(20); - engine.evaluateAndMaybeRollback(); - - const counts = engine.simulateTraffic(100); - expect(counts['canary-v2']).toBe(0); - expect(counts['stable-v1']).toBe(100); - }); +describe('Canary rollout - traffic percentage controls', () => { + let engine: RolloutEngine; + + beforeEach(() => { + engine = new RolloutEngine(makeVersion('stable-v1'), makeVersion('canary-v2')); + }); + + it('starts with 0 % canary traffic (pending status)', () => { + expect(engine.canaryPercent).toBe(0); + expect(engine.status).toBe('pending'); + }); + + it('routes 0 % to canary when percent is 0', () => { + const counts = engine.simulateTraffic(100); + expect(counts['canary-v2']).toBe(0); + expect(counts['stable-v1']).toBe(100); + }); + + it('routes about 10 % to canary at 10 % setting', () => { + engine.setTrafficPercent(10); + const counts = engine.simulateTraffic(1_000); + expect(counts['canary-v2']).toBeGreaterThanOrEqual(80); + expect(counts['canary-v2']).toBeLessThanOrEqual(120); + expect(engine.status).toBe('in_progress'); + }); + + it('routes about 50 % to canary at 50 % setting', () => { + engine.setTrafficPercent(50); + const counts = engine.simulateTraffic(1_000); + expect(counts['canary-v2']).toBeGreaterThanOrEqual(480); + expect(counts['canary-v2']).toBeLessThanOrEqual(520); + }); + + it('routes 100 % to canary after promotion', () => { + engine.promote(); + const counts = engine.simulateTraffic(100); + expect(counts['canary-v2']).toBe(100); + expect(engine.status).toBe('promoted'); + }); + + it('rejects out-of-range traffic percentages', () => { + expect(() => engine.setTrafficPercent(-1)).toThrow(RangeError); + expect(() => engine.setTrafficPercent(101)).toThrow(RangeError); + }); }); -describe('Canary rollout — incremental rollout steps', () => { - it('progresses through 5 → 25 → 50 → 100 % without rollback on healthy candidate', () => { - const engine = new RolloutEngine( - makeVersion('stable-v1'), - makeVersion('canary-v2', 0.001, 100), - ); - - for (const pct of [5, 25, 50, 100]) { - engine.setTrafficPercent(pct); - const rolledBack = engine.evaluateAndMaybeRollback(); - expect(rolledBack).toBe(false); - expect(engine.canaryPercent).toBe(pct); - } - - expect(engine.status).toBe('promoted'); - }); +describe('Canary rollout - automatic rollback', () => { + it('rolls back when candidate error rate exceeds threshold', () => { + const engine = new RolloutEngine( + makeVersion('stable-v1'), + makeVersion('canary-v2', 0.08), + ); + engine.setTrafficPercent(10); + + expect(engine.evaluateAndMaybeRollback()).toBe(true); + expect(engine.status).toBe('rolled_back'); + expect(engine.canaryPercent).toBe(0); + }); + + it('rolls back when candidate p99 latency exceeds threshold', () => { + const engine = new RolloutEngine( + makeVersion('stable-v1'), + makeVersion('canary-v2', 0.001, 2_500), + ); + engine.setTrafficPercent(10); + + expect(engine.evaluateAndMaybeRollback()).toBe(true); + expect(engine.status).toBe('rolled_back'); + }); + + it('does not roll back when candidate is healthy', () => { + const engine = new RolloutEngine( + makeVersion('stable-v1'), + makeVersion('canary-v2', 0.002, 150), + ); + engine.setTrafficPercent(25); + + expect(engine.evaluateAndMaybeRollback()).toBe(false); + expect(engine.status).toBe('in_progress'); + expect(engine.canaryPercent).toBe(25); + }); }); -describe('Blue-green deployment — switching', () => { - it('starts serving traffic from the initial active environment', () => { - const switcher = new BlueGreenSwitcher( - makeVersion('blue-v1'), - makeVersion('green-v2'), - 'blue', - ); - const result = switcher.route({ id: 'r1' }); - expect(result.servedBy).toBe('blue-v1'); - expect(switcher.active).toBe('blue'); - }); - - it('switches to green when green is healthy', () => { - const switcher = new BlueGreenSwitcher( - makeVersion('blue-v1'), - makeVersion('green-v2', 0.001, 100), - 'blue', - ); - - const switched = switcher.switchToStandby(); - - expect(switched).toBe(true); - expect(switcher.active).toBe('green'); - expect(switcher.standby).toBe('blue'); - }); - - it('routes all traffic to new active after switch', () => { - const switcher = new BlueGreenSwitcher( - makeVersion('blue-v1'), - makeVersion('green-v2'), - 'blue', - ); - switcher.switchToStandby(); - - for (let i = 0; i < 10; i++) { - expect(switcher.route({ id: `r${i}` }).servedBy).toBe('green-v2'); - } - }); - - it('refuses to switch when standby has high error rate', () => { - const switcher = new BlueGreenSwitcher( - makeVersion('blue-v1'), - makeVersion('green-v2', 0.1), // unhealthy - 'blue', - ); - - const switched = switcher.switchToStandby(); - - expect(switched).toBe(false); - expect(switcher.active).toBe('blue'); // unchanged - }); - - it('refuses to switch when standby has high latency', () => { - const switcher = new BlueGreenSwitcher( - makeVersion('blue-v1'), - makeVersion('green-v2', 0.001, 3_000), // unhealthy latency - 'blue', - ); - - expect(switcher.switchToStandby()).toBe(false); - expect(switcher.active).toBe('blue'); - }); - - it('can switch back to blue (rollback) if green becomes unhealthy', () => { - const green = makeVersion('green-v2', 0.001, 100); - const switcher = new BlueGreenSwitcher(makeVersion('blue-v1'), green, 'blue'); - - switcher.switchToStandby(); // blue → green - expect(switcher.active).toBe('green'); - - // Simulate green degrading — mutate in place - green.errorRate = 0.2; - - // Standby is now blue (healthy); switch back - const rolledBack = switcher.switchToStandby(); - expect(rolledBack).toBe(true); - expect(switcher.active).toBe('blue'); - }); -}); - -describe('Traffic splitting — realistic patterns', () => { - it('splits traffic proportionally across 1 000 requests at each step', () => { - const steps = [10, 25, 50, 75]; - for (const pct of steps) { - const engine = new RolloutEngine( - makeVersion('stable'), - makeVersion('candidate'), - ); - engine.setTrafficPercent(pct); - const counts = engine.simulateTraffic(1_000); - const actualPct = (counts['candidate'] / 1_000) * 100; - // Allow ±3 % tolerance - expect(actualPct).toBeGreaterThanOrEqual(pct - 3); - expect(actualPct).toBeLessThanOrEqual(pct + 3); - } - }); - - it('total requests always equals the number sent', () => { - const engine = new RolloutEngine(makeVersion('s'), makeVersion('c')); - engine.setTrafficPercent(33); - const counts = engine.simulateTraffic(500); - const total = Object.values(counts).reduce((a, b) => a + b, 0); - expect(total).toBe(500); - }); +describe('Blue-green deployment - switching', () => { + it('starts serving traffic from the initial active environment', () => { + const switcher = new BlueGreenSwitcher( + makeVersion('blue-v1'), + makeVersion('green-v2'), + 'blue', + ); + + expect(switcher.activeVersion().id).toBe('blue-v1'); + expect(switcher.active).toBe('blue'); + }); + + it('switches to green when green is healthy', () => { + const switcher = new BlueGreenSwitcher( + makeVersion('blue-v1'), + makeVersion('green-v2', 0.001, 100), + 'blue', + ); + + expect(switcher.switchToStandby()).toBe(true); + expect(switcher.active).toBe('green'); + expect(switcher.standby).toBe('blue'); + }); + + it('refuses to switch when standby is unhealthy', () => { + const switcher = new BlueGreenSwitcher( + makeVersion('blue-v1'), + makeVersion('green-v2', 0.1, 3_000), + 'blue', + ); + + expect(switcher.switchToStandby()).toBe(false); + expect(switcher.active).toBe('blue'); + }); + + it('can switch back to blue after green degrades', () => { + const green = makeVersion('green-v2', 0.001, 100); + const switcher = new BlueGreenSwitcher(makeVersion('blue-v1'), green, 'blue'); + + switcher.switchToStandby(); + green.errorRate = 0.2; + + expect(switcher.switchToStandby()).toBe(true); + expect(switcher.active).toBe('blue'); + }); }); diff --git a/docs/README.md b/docs/README.md index f76d864..2794d18 100644 --- a/docs/README.md +++ b/docs/README.md @@ -289,6 +289,35 @@ Error model: - `NETWORK_ERROR` for transport failures to GitHub - `API_ERROR` for non-auth GitHub API failures +### Blue-Green Deployment Updates + +Live deployment updates now use a staged rollout instead of a hard cutover when the deployment already has a Vercel project binding. + +Behavior: +- The update pipeline deploys the new revision to the standby environment first. +- Canary rollout state is tracked on the `deployment_updates.canary_percent` field for observability. +- Promotion reassigns the active Vercel aliases only after the rollout checks pass. +- If candidate health degrades or an operator requests rollback during rollout, traffic stays on the previous active deployment. +- If a Vercel alias reassignment fails mid-switch, the service automatically reassigns any moved aliases back to the previous deployment before surfacing the error. + +Sequence diagram: + +```text +deployment update + -> standby deploy + -> canary traffic (5% -> 25% -> 50%) + -> monitor candidate health + -> promote active aliases + -> 100% traffic on candidate + +deployment update + -> standby deploy + -> canary traffic + -> monitor detects failure or manual rollback + -> keep active aliases on stable deployment + -> mark update rolled_back +``` + ### Payment Endpoints ``` diff --git a/supabase/migrations/009_deployment_updates_rollout.sql b/supabase/migrations/009_deployment_updates_rollout.sql new file mode 100644 index 0000000..4a44aef --- /dev/null +++ b/supabase/migrations/009_deployment_updates_rollout.sql @@ -0,0 +1,72 @@ +CREATE TABLE IF NOT EXISTS deployment_updates ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + deployment_id UUID NOT NULL REFERENCES deployments(id) ON DELETE CASCADE, + user_id UUID NOT NULL REFERENCES profiles(id) ON DELETE CASCADE, + new_customization_config JSONB NOT NULL, + previous_state JSONB, + status TEXT NOT NULL DEFAULT 'pending' CHECK ( + status IN ( + 'pending', + 'validating', + 'generating', + 'updating_repo', + 'redeploying', + 'completed', + 'rolled_back', + 'failed' + ) + ), + canary_percent INTEGER NOT NULL DEFAULT 0 CHECK (canary_percent BETWEEN 0 AND 100), + error_message TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() NOT NULL, + updated_at TIMESTAMPTZ DEFAULT NOW() NOT NULL, + completed_at TIMESTAMPTZ +); + +ALTER TABLE deployment_updates + ADD COLUMN IF NOT EXISTS canary_percent INTEGER NOT NULL DEFAULT 0; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE conname = 'deployment_updates_canary_percent_check' + ) THEN + ALTER TABLE deployment_updates + ADD CONSTRAINT deployment_updates_canary_percent_check + CHECK (canary_percent BETWEEN 0 AND 100); + END IF; +END $$; + +CREATE INDEX IF NOT EXISTS idx_deployment_updates_deployment_id + ON deployment_updates(deployment_id); + +CREATE INDEX IF NOT EXISTS idx_deployment_updates_status + ON deployment_updates(status); + +DROP TRIGGER IF EXISTS update_deployment_updates_updated_at ON deployment_updates; +CREATE TRIGGER update_deployment_updates_updated_at + BEFORE UPDATE ON deployment_updates + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + +ALTER TABLE deployment_updates ENABLE ROW LEVEL SECURITY; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_policies + WHERE schemaname = 'public' + AND tablename = 'deployment_updates' + AND policyname = 'Users can manage their own deployment updates' + ) THEN + CREATE POLICY "Users can manage their own deployment updates" + ON deployment_updates + FOR ALL + TO authenticated + USING (auth.uid() = user_id) + WITH CHECK (auth.uid() = user_id); + END IF; +END $$;