diff --git a/.github/ct.yaml b/.github/ct.yaml
new file mode 100644
index 00000000..8ace08ca
--- /dev/null
+++ b/.github/ct.yaml
@@ -0,0 +1,9 @@
+# Chart testing configuration for ct (chart-testing)
+target-branch: main
+chart-dirs:
+  - deploy/helm
+chart-repos:
+  - bitnami=https://charts.bitnami.com/bitnami
+helm-extra-args: --timeout 600s
+check-version-increment: true
+debug: true
\ No newline at end of file
diff --git a/.github/workflows/aws-cdk.yml b/.github/workflows/aws-cdk.yml
new file mode 100644
index 00000000..dc3e3a30
--- /dev/null
+++ b/.github/workflows/aws-cdk.yml
@@ -0,0 +1,300 @@
+name: AWS CDK
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'deploy/aws-cdk/**'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'deploy/aws-cdk/**'
+
+jobs:
+  cdk-validate:
+    name: CDK Validation
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: '18'
+          cache: 'npm'
+          cache-dependency-path: deploy/aws-cdk/package-lock.json
+
+      - name: Install dependencies
+        run: |
+          cd deploy/aws-cdk
+          npm ci
+
+      - name: Run TypeScript compilation
+        run: |
+          cd deploy/aws-cdk
+          npm run build
+
+      - name: Run tests
+        run: |
+          cd deploy/aws-cdk
+          npm test
+
+      - name: Install CDK CLI
+        run: npm install -g aws-cdk
+
+      - name: CDK Synth - Development
+        run: |
+          cd deploy/aws-cdk
+          cdk synth \
+            --context @examples/cdk-dev.json \
+            --output /tmp/cdk-dev-synth
+
+      - name: CDK Synth - Production
+        run: |
+          cd deploy/aws-cdk
+          cdk synth \
+            --context @examples/cdk-prod.json \
+            --output /tmp/cdk-prod-synth
+
+      - name: Validate CloudFormation templates
+        run: |
+          # Install cfn-lint
+          pip install cfn-lint
+          
+          # Validate generated templates
+          for template in /tmp/cdk-*-synth/*.template.json; do
+            echo "Validating $template..."
+            cfn-lint "$template"
+          done
+
+      - name: Upload CDK synthesis artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: cdk-synthesized-templates
+          path: |
+            /tmp/cdk-*-synth/
+          retention-days: 7
+
+  cdk-security-scan:
+    name: CDK Security Scan
+    runs-on: ubuntu-latest
+    needs: cdk-validate
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: '18'
+          cache: 'npm'
+          cache-dependency-path: deploy/aws-cdk/package-lock.json
+
+      - name: Install dependencies
+        run: |
+          cd deploy/aws-cdk
+          npm ci
+
+      - name: Install CDK CLI
+        run: npm install -g aws-cdk
+
+      - name: Synthesize templates for security scanning
+        run: |
+          cd deploy/aws-cdk
+          cdk synth \
+            --context @examples/cdk-prod.json \
+            --output /tmp/cdk-security-scan
+
+      - name: Install Checkov
+        run: pip install checkov
+
+      - name: Run Checkov security scan
+        run: |
+          checkov -d /tmp/cdk-security-scan \
+            --framework cloudformation \
+            --output cli \
+            --output sarif \
+            --output-file-path /tmp/checkov-cdk-results.sarif \
+            --quiet || echo "Security scan completed with findings"
+
+      - name: Upload security scan results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: cdk-security-scan-results
+          path: |
+            /tmp/checkov-cdk-results.sarif
+          retention-days: 30
+
+  cdk-deploy-test:
+    name: CDK Deploy to Test Environment
+    runs-on: ubuntu-latest
+    needs: [cdk-validate, cdk-security-scan]
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    environment: test
+    env:
+      AWS_REGION: us-west-2
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ env.AWS_REGION }}
+          role-session-name: CDKDeployTest
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: '18'
+          cache: 'npm'
+          cache-dependency-path: deploy/aws-cdk/package-lock.json
+
+      - name: Install dependencies
+        run: |
+          cd deploy/aws-cdk
+          npm ci
+
+      - name: Install CDK CLI
+        run: npm install -g aws-cdk
+
+      - name: CDK Bootstrap (if needed)
+        run: |
+          cd deploy/aws-cdk
+          cdk bootstrap --require-approval never
+
+      - name: CDK Deploy Test Environment
+        run: |
+          cd deploy/aws-cdk
+          cdk deploy \
+            --context stackName=LlmProxyEksTest \
+            --context clusterName=llm-proxy-test \
+            --context environment=test \
+            --context 'helmValues={"image":{"tag":"${{ github.sha }}"}}' \
+            --require-approval never \
+            --outputs-file /tmp/cdk-outputs.json
+
+      - name: Test deployment
+        run: |
+          # Configure kubectl
+          aws eks update-kubeconfig --region ${{ env.AWS_REGION }} --name llm-proxy-test
+          
+          # Wait for deployment to be ready
+          kubectl wait --for=condition=available deployment/llm-proxy \
+            --namespace llm-proxy \
+            --timeout=300s
+          
+          # Run basic health check
+          kubectl port-forward -n llm-proxy svc/llm-proxy 8080:8080 &
+          sleep 5
+          curl -f http://localhost:8080/health
+
+      - name: Run Helm tests
+        run: |
+          helm test llm-proxy -n llm-proxy --timeout 300s
+
+      - name: Upload deployment outputs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: cdk-deployment-outputs
+          path: |
+            /tmp/cdk-outputs.json
+          retention-days: 7
+
+      - name: Cleanup test environment
+        if: always()
+        run: |
+          cd deploy/aws-cdk
+          cdk destroy \
+            --context stackName=LlmProxyEksTest \
+            --force
+
+  cdk-cost-estimate:
+    name: CDK Cost Estimation
+    runs-on: ubuntu-latest
+    needs: cdk-validate
+    if: github.event_name == 'pull_request'
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v3
+        with:
+          node-version: '18'
+          cache: 'npm'
+          cache-dependency-path: deploy/aws-cdk/package-lock.json
+
+      - name: Install dependencies
+        run: |
+          cd deploy/aws-cdk
+          npm ci
+
+      - name: Install CDK CLI
+        run: npm install -g aws-cdk
+
+      - name: Synthesize for cost estimation
+        run: |
+          cd deploy/aws-cdk
+          cdk synth \
+            --context @examples/cdk-prod.json \
+            --output /tmp/cdk-cost-estimate
+
+      - name: Install Infracost
+        run: |
+          curl -fsSL https://raw.githubusercontent.com/infracost/infracost/master/scripts/install.sh | sh
+
+      - name: Generate cost estimate
+        run: |
+          # Note: This would require Infracost API key
+          # infracost breakdown \
+          #   --path /tmp/cdk-cost-estimate \
+          #   --format json \
+          #   --out-file /tmp/infracost-estimate.json
+          
+          echo "Cost estimation would be generated here with proper Infracost setup"
+          echo "Estimated monthly cost for production deployment: ~$500-2000 USD"
+          echo "Components:"
+          echo "- EKS cluster: ~$73/month"
+          echo "- EC2 instances (3x m5.large): ~$465/month"
+          echo "- NAT Gateway: ~$45/month"
+          echo "- Load Balancer: ~$22/month"
+          echo "- EBS storage: ~$20/month"
+
+      - name: Comment cost estimate on PR
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const costComment = `
+            ## 💰 CDK Cost Estimation
+            
+            Estimated monthly cost for production deployment: **~$500-2000 USD**
+            
+            ### Cost Breakdown:
+            - EKS cluster: ~$73/month
+            - EC2 instances (3x m5.large): ~$465/month  
+            - NAT Gateway: ~$45/month
+            - Application Load Balancer: ~$22/month
+            - EBS storage (300GB): ~$20/month
+            
+            ### Cost Optimization Tips:
+            - Use Spot instances for development environments
+            - Enable cluster autoscaler to optimize node usage
+            - Consider Reserved Instances for production workloads
+            - Monitor and right-size instance types based on actual usage
+            
+            *Note: Costs may vary based on actual usage, region, and configuration.*
+            `;
+            
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: costComment
+            });
\ No newline at end of file
diff --git a/.github/workflows/helm.yml b/.github/workflows/helm.yml
new file mode 100644
index 00000000..53da7be9
--- /dev/null
+++ b/.github/workflows/helm.yml
@@ -0,0 +1,207 @@
+name: Helm Chart
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'deploy/helm/**'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'deploy/helm/**'
+
+jobs:
+  helm-lint:
+    name: Helm Lint and Test
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v3
+        with:
+          version: 'v3.13.1'
+
+      - name: Add Helm repositories
+        run: |
+          helm repo add bitnami https://charts.bitnami.com/bitnami
+          helm repo update
+
+      - name: Lint Helm chart
+        run: |
+          cd deploy/helm/llm-proxy
+          helm lint .
+
+      - name: Update Helm dependencies
+        run: |
+          cd deploy/helm/llm-proxy
+          helm dependency update
+
+      - name: Test Helm template rendering - Default values
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template test-release . \
+            --set config.managementToken=test-token \
+            --dry-run > /tmp/helm-default.yaml
+
+      - name: Test Helm template rendering - External Redis
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template test-release . \
+            --set config.managementToken=test-token \
+            --set redis.enabled=false \
+            --set redis.external.host=redis.example.com \
+            --dry-run > /tmp/helm-external-redis.yaml
+
+      - name: Test Helm template rendering - Dispatcher disabled
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template test-release . \
+            --set config.managementToken=test-token \
+            --set dispatcher.enabled=false \
+            --dry-run > /tmp/helm-no-dispatcher.yaml
+
+      - name: Test Helm template rendering - PostgreSQL
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template test-release . \
+            --set config.managementToken=test-token \
+            --set config.database.type=postgresql \
+            --set config.database.postgresql.host=postgres.example.com \
+            --set config.database.postgresql.user=llmproxy \
+            --set config.database.postgresql.password=password \
+            --set config.database.postgresql.database=llmproxy \
+            --dry-run > /tmp/helm-postgresql.yaml
+
+      - name: Test Helm template rendering - Full production config
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template test-release . \
+            --set config.managementToken=test-token \
+            --set ingress.enabled=true \
+            --set ingress.hosts[0].host=llm-proxy.example.com \
+            --set ingress.hosts[0].paths[0].path=/ \
+            --set ingress.hosts[0].paths[0].pathType=Prefix \
+            --set autoscaling.enabled=true \
+            --set autoscaling.minReplicas=2 \
+            --set autoscaling.maxReplicas=10 \
+            --set serviceMonitor.enabled=true \
+            --set networkPolicy.enabled=true \
+            --dry-run > /tmp/helm-production.yaml
+
+      - name: Validate Kubernetes manifests
+        run: |
+          # Install kubeval for validation
+          curl -sSL https://github.com/instrumenta/kubeval/releases/latest/download/kubeval-linux-amd64.tar.gz | tar xz
+          sudo mv kubeval /usr/local/bin/
+          
+          # Validate all generated manifests
+          for file in /tmp/helm-*.yaml; do
+            echo "Validating $file..."
+            kubeval "$file"
+          done
+
+      - name: Upload Helm test artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: helm-rendered-templates
+          path: |
+            /tmp/helm-*.yaml
+          retention-days: 7
+
+  helm-chart-test:
+    name: Helm Chart Testing (ct)
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v3
+        with:
+          version: 'v3.13.1'
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Set up chart-testing
+        uses: helm/chart-testing-action@v2.6.0
+
+      - name: Add Helm repositories
+        run: |
+          helm repo add bitnami https://charts.bitnami.com/bitnami
+          helm repo update
+
+      - name: Run chart-testing (list)
+        run: ct list --config .github/ct.yaml
+
+      - name: Run chart-testing (lint)
+        run: ct lint --config .github/ct.yaml
+
+      # Note: Integration testing requires a Kubernetes cluster
+      # This would be enabled in a full CI environment with kind/minikube
+      # - name: Create kind cluster
+      #   uses: helm/kind-action@v1.5.0
+      # 
+      # - name: Run chart-testing (install)
+      #   run: ct install --config .github/ct.yaml
+
+  helm-security-scan:
+    name: Helm Security Scan
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v3
+        with:
+          version: 'v3.13.1'
+
+      - name: Add Helm repositories
+        run: |
+          helm repo add bitnami https://charts.bitnami.com/bitnami
+          helm repo update
+
+      - name: Update Helm dependencies
+        run: |
+          cd deploy/helm/llm-proxy
+          helm dependency update
+
+      - name: Install Checkov
+        run: |
+          pip install checkov
+
+      - name: Render templates for security scanning
+        run: |
+          cd deploy/helm/llm-proxy
+          helm template security-scan . \
+            --set config.managementToken=test-token \
+            --output-dir /tmp/helm-security-scan
+
+      - name: Run Checkov security scan
+        run: |
+          checkov -d /tmp/helm-security-scan \
+            --framework kubernetes \
+            --output cli \
+            --output sarif \
+            --output-file-path /tmp/checkov-results.sarif \
+            --quiet || echo "Security scan completed with findings"
+
+      - name: Upload security scan results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: helm-security-scan-results
+          path: |
+            /tmp/checkov-results.sarif
+          retention-days: 30
\ No newline at end of file
diff --git a/deploy/aws-cdk/.gitignore b/deploy/aws-cdk/.gitignore
new file mode 100644
index 00000000..fa6107a4
--- /dev/null
+++ b/deploy/aws-cdk/.gitignore
@@ -0,0 +1,41 @@
+# Dependencies
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# CDK output
+*.js
+*.js.map
+*.d.ts
+dist/
+cdk.out/
+cdk.context.json
+
+# Coverage
+coverage/
+*.lcov
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+logs/
+*.log
+
+# Runtime data
+pids/
+*.pid
+*.seed
+*.pid.lock
+
+# Temporary files
+.tmp/
+temp/
\ No newline at end of file
diff --git a/deploy/aws-cdk/README.md b/deploy/aws-cdk/README.md
new file mode 100644
index 00000000..93cbaa0a
--- /dev/null
+++ b/deploy/aws-cdk/README.md
@@ -0,0 +1,427 @@
+# AWS CDK Deployment for EKS
+
+This directory contains AWS CDK (Cloud Development Kit) code for deploying LLM Proxy to Amazon EKS (Elastic Kubernetes Service). The CDK stack provides a complete infrastructure-as-code solution for production-ready deployments.
+
+## Features
+
+- **Complete EKS cluster setup** with managed node groups
+- **VPC configuration** with public/private subnets
+- **IAM roles and service accounts** with least-privilege access
+- **AWS Load Balancer Controller** for ingress management
+- **EBS and EFS CSI drivers** for persistent storage
+- **Cluster autoscaler** for automatic node scaling
+- **External Secrets Operator** integration with AWS Secrets Manager
+- **Helm chart deployment** with production-ready configuration
+- **Monitoring and observability** setup
+
+## Prerequisites
+
+### Tools
+
+- **Node.js** (v18+)
+- **AWS CLI** configured with appropriate credentials
+- **AWS CDK** (v2.80+)
+- **kubectl** (for cluster management)
+- **Helm** (v3.8+)
+
+### AWS Permissions
+
+Your AWS credentials need the following permissions:
+- EKS cluster creation and management
+- VPC and networking resources
+- IAM role creation and management
+- Secrets Manager access
+- EC2 instance management
+
+## Quick Start
+
+### 1. Install Dependencies
+
+```bash
+cd deploy/aws-cdk
+npm install
+```
+
+### 2. Configure AWS CLI
+
+```bash
+aws configure
+# or use AWS SSO
+aws sso login --profile your-profile
+```
+
+### 3. Bootstrap CDK (First Time Only)
+
+```bash
+# Bootstrap CDK in your AWS account/region
+npx cdk bootstrap
+
+# If using a specific profile
+npx cdk bootstrap --profile your-profile
+```
+
+### 4. Deploy Development Environment
+
+```bash
+# Deploy with development configuration
+npx cdk deploy \
+  --context @cdk-dev.json \
+  --require-approval never
+
+# Or specify individual parameters
+npx cdk deploy \
+  --context stackName=LlmProxyEksDev \
+  --context clusterName=llm-proxy-dev \
+  --context environment=dev
+```
+
+### 5. Configure kubectl
+
+```bash
+# Update kubeconfig
+aws eks update-kubeconfig --region us-west-2 --name llm-proxy-dev
+
+# Verify connectivity
+kubectl get nodes
+kubectl get pods -n llm-proxy
+```
+
+## Configuration
+
+### Context Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `stackName` | CloudFormation stack name | `LlmProxyEks` |
+| `clusterName` | EKS cluster name | `llm-proxy-cluster` |
+| `namespace` | Kubernetes namespace | `llm-proxy` |
+| `environment` | Environment tag | `dev` |
+| `helmChartPath` | Path to Helm chart | `../../helm/llm-proxy` |
+| `helmValues` | Helm chart values override | `{}` |
+
+### Example Deployments
+
+#### Development Environment
+
+```bash
+npx cdk deploy --context @examples/cdk-dev.json
+```
+
+#### Production Environment
+
+```bash
+npx cdk deploy --context @examples/cdk-prod.json
+```
+
+#### Custom Configuration
+
+```bash
+npx cdk deploy \
+  --context stackName=MyLlmProxy \
+  --context clusterName=my-cluster \
+  --context environment=staging \
+  --context 'helmValues={"autoscaling":{"enabled":true}}'
+```
+
+## Infrastructure Components
+
+### EKS Cluster
+
+- **Kubernetes Version**: 1.28 (configurable)
+- **Endpoint Access**: Public and private
+- **Node Groups**: Managed node groups with auto-scaling
+- **Add-ons**: AWS Load Balancer Controller, EBS CSI, EFS CSI, Cluster Autoscaler
+
+### Networking
+
+- **VPC**: Multi-AZ with public and private subnets
+- **Security Groups**: Least-privilege access rules
+- **Network Policies**: Optional pod-to-pod communication control
+
+### Storage
+
+- **EBS CSI Driver**: For persistent volume claims
+- **EFS CSI Driver**: For shared storage (if needed)
+- **Storage Classes**: gp3 for high-performance storage
+
+### Security
+
+- **IAM Roles for Service Accounts (IRSA)**: Fine-grained permissions
+- **AWS Secrets Manager**: Secure secrets storage
+- **External Secrets Operator**: Kubernetes secrets sync
+- **Pod Security Standards**: Security contexts and policies
+
+### Monitoring
+
+- **Metrics Server**: Resource metrics collection
+- **CloudWatch Integration**: Logs and metrics forwarding
+- **Prometheus Integration**: ServiceMonitor and PodMonitor resources
+
+## Secrets Management
+
+### AWS Secrets Manager
+
+The CDK stack automatically creates and manages secrets:
+
+```bash
+# View created secrets
+aws secretsmanager list-secrets --query 'SecretList[?contains(Name, `llm-proxy`)]'
+
+# Get management token
+aws secretsmanager get-secret-value \
+  --secret-id llm-proxy/management-token \
+  --query SecretString --output text
+```
+
+### External Secrets Operator
+
+The stack deploys External Secrets Operator to sync AWS secrets to Kubernetes:
+
+```yaml
+# Example ExternalSecret (automatically created)
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+  name: llm-proxy-management-token
+  namespace: llm-proxy
+spec:
+  refreshInterval: 1h
+  secretStoreRef:
+    name: aws-secrets-manager
+    kind: SecretStore
+  target:
+    name: llm-proxy-secrets
+  data:
+  - secretKey: management-token
+    remoteRef:
+      key: llm-proxy/management-token
+      property: token
+```
+
+## Custom Helm Values
+
+### Override via Context
+
+```bash
+npx cdk deploy --context 'helmValues={
+  "autoscaling": {"enabled": true, "minReplicas": 3},
+  "ingress": {"enabled": true, "hosts": [{"host": "my-domain.com"}]}
+}'
+```
+
+### Configuration File
+
+Create a custom configuration file:
+
+```json
+{
+  "stackName": "MyLlmProxy",
+  "helmValues": {
+    "image": {"tag": "v1.2.3"},
+    "resources": {
+      "limits": {"cpu": "2000m", "memory": "2Gi"}
+    },
+    "redis": {
+      "enabled": false,
+      "external": {"host": "my-redis.elasticache.aws"}
+    }
+  }
+}
+```
+
+## Operations
+
+### Cluster Management
+
+```bash
+# View cluster info
+kubectl cluster-info
+
+# Scale node groups
+aws eks update-nodegroup-config \
+  --cluster-name llm-proxy-cluster \
+  --nodegroup-name compute \
+  --scaling-config minSize=2,maxSize=20,desiredSize=5
+
+# Update cluster version
+aws eks update-cluster-version \
+  --name llm-proxy-cluster \
+  --kubernetes-version 1.29
+```
+
+### Application Management
+
+```bash
+# Check deployment status
+kubectl get deployment -n llm-proxy
+
+# View logs
+kubectl logs -n llm-proxy deployment/llm-proxy -f
+
+# Port forward for testing
+kubectl port-forward -n llm-proxy svc/llm-proxy 8080:8080
+
+# Run Helm tests
+helm test llm-proxy -n llm-proxy
+```
+
+### Monitoring
+
+```bash
+# View metrics
+kubectl top pods -n llm-proxy
+kubectl top nodes
+
+# Check autoscaler status
+kubectl logs -n kube-system deployment/cluster-autoscaler -f
+
+# View ingress
+kubectl get ingress -n llm-proxy
+kubectl describe ingress -n llm-proxy
+```
+
+## Upgrade and Maintenance
+
+### CDK Stack Updates
+
+```bash
+# View changes
+npx cdk diff
+
+# Deploy updates
+npx cdk deploy --require-approval never
+
+# Rollback if needed
+npx cdk deploy --rollback
+```
+
+### Kubernetes Cluster Upgrades
+
+```bash
+# Update cluster control plane
+aws eks update-cluster-version \
+  --name llm-proxy-cluster \
+  --kubernetes-version 1.29
+
+# Update node groups
+aws eks update-nodegroup-version \
+  --cluster-name llm-proxy-cluster \
+  --nodegroup-name compute \
+  --kubernetes-version 1.29
+```
+
+### Application Updates
+
+```bash
+# Update Helm chart
+helm upgrade llm-proxy ../../helm/llm-proxy \
+  --namespace llm-proxy \
+  --values examples/values-production.yaml
+
+# Rollback application
+helm rollback llm-proxy 1 -n llm-proxy
+```
+
+## Cost Optimization
+
+### Development Environment
+
+- Use smaller instance types (t3.medium)
+- Disable Redis persistence
+- Single replica deployments
+- Spot instances for non-critical workloads
+
+### Production Environment
+
+- Use appropriate instance types (m5.large+)
+- Enable cluster autoscaler
+- Use reserved instances for baseline capacity
+- Monitor and optimize resource requests/limits
+
+## Troubleshooting
+
+### Common Issues
+
+#### EKS Cluster Access
+
+```bash
+# Update kubeconfig
+aws eks update-kubeconfig --region us-west-2 --name llm-proxy-cluster
+
+# Check AWS credentials
+aws sts get-caller-identity
+
+# Verify cluster status
+aws eks describe-cluster --name llm-proxy-cluster
+```
+
+#### Helm Deployment Issues
+
+```bash
+# Check Helm release
+helm status llm-proxy -n llm-proxy
+
+# View Helm values
+helm get values llm-proxy -n llm-proxy
+
+# Debug template rendering
+helm template llm-proxy ../../helm/llm-proxy --debug
+```
+
+#### Networking Issues
+
+```bash
+# Check security groups
+aws ec2 describe-security-groups \
+  --filters "Name=group-name,Values=*llm-proxy*"
+
+# Verify VPC endpoints
+aws ec2 describe-vpc-endpoints
+
+# Test DNS resolution
+kubectl run debug --image=busybox --rm -it -- nslookup kubernetes.default
+```
+
+### Cleanup
+
+```bash
+# Delete CDK stack
+npx cdk destroy
+
+# Verify cleanup
+aws cloudformation list-stacks \
+  --query 'StackSummaries[?contains(StackName, `LlmProxy`)]'
+
+# Manual cleanup (if needed)
+aws eks delete-cluster --name llm-proxy-cluster
+```
+
+## Security Considerations
+
+### Network Security
+
+- Private node groups by default
+- Network policies for pod-to-pod communication
+- Security groups with minimal required access
+- VPC endpoints for AWS services
+
+### Access Control
+
+- RBAC with least-privilege service accounts
+- IRSA for AWS service access
+- External secrets for sensitive data
+- Pod security contexts and standards
+
+### Monitoring
+
+- CloudTrail for API auditing
+- VPC Flow Logs for network monitoring
+- CloudWatch for application logs
+- Security scanning with tools like Falco
+
+## Support
+
+For issues and questions:
+- **GitHub Issues**: https://github.com/sofatutor/llm-proxy/issues
+- **AWS Documentation**: https://docs.aws.amazon.com/eks/
+- **CDK Documentation**: https://docs.aws.amazon.com/cdk/
\ No newline at end of file
diff --git a/deploy/aws-cdk/cdk.json b/deploy/aws-cdk/cdk.json
new file mode 100644
index 00000000..237be7dc
--- /dev/null
+++ b/deploy/aws-cdk/cdk.json
@@ -0,0 +1,62 @@
+{
+  "app": "npx ts-node --prefer-ts-exts src/app.ts",
+  "watch": {
+    "include": [
+      "**"
+    ],
+    "exclude": [
+      "README.md",
+      "cdk*.json",
+      "**/*.d.ts",
+      "**/*.js",
+      "tsconfig.json",
+      "package*.json",
+      "yarn.lock",
+      "node_modules",
+      "test"
+    ]
+  },
+  "context": {
+    "@aws-cdk/aws-lambda:recognizeLayerVersion": true,
+    "@aws-cdk/core:checkSecretUsage": true,
+    "@aws-cdk/core:target-partitions": [
+      "aws",
+      "aws-cn"
+    ],
+    "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
+    "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
+    "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
+    "@aws-cdk/aws-iam:minimizePolicies": true,
+    "@aws-cdk/core:validateSnapshotRemovalPolicy": true,
+    "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
+    "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
+    "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
+    "@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
+    "@aws-cdk/core:enablePartitionLiterals": true,
+    "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true,
+    "@aws-cdk/aws-iam:standardizedServicePrincipals": true,
+    "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true,
+    "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true,
+    "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true,
+    "@aws-cdk/aws-route53-patters:useCertificate": true,
+    "@aws-cdk/customresources:installLatestAwsSdkDefault": false,
+    "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true,
+    "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true,
+    "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true,
+    "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true,
+    "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true,
+    "@aws-cdk/aws-redshift:columnId": true,
+    "@aws-cdk/aws-stepfunctions-tasks:enableLoggingConfigurationForLambdaInvoke": true,
+    "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true,
+    "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true,
+    "@aws-cdk/aws-kms:aliasNameRef": true,
+    "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true,
+    "@aws-cdk/core:includePrefixInUniqueNameGeneration": true,
+    "@aws-cdk/aws-efs:denyAnonymousAccess": true,
+    "@aws-cdk/aws-opensearchservice:enableLogging": true,
+    "@aws-cdk/aws-lambda:useLatestRuntimeVersion": true,
+    "@aws-cdk/aws-ecs:removeDefaultDeploymentAlarm": true,
+    "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true,
+    "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForSourceAction": true
+  }
+}
\ No newline at end of file
diff --git a/deploy/aws-cdk/examples/cdk-dev.json b/deploy/aws-cdk/examples/cdk-dev.json
new file mode 100644
index 00000000..e8d44f54
--- /dev/null
+++ b/deploy/aws-cdk/examples/cdk-dev.json
@@ -0,0 +1,64 @@
+# Development environment deployment
+# Use this for development and testing environments
+
+{
+  "stackName": "LlmProxyEksDev",
+  "clusterName": "llm-proxy-dev",
+  "namespace": "llm-proxy",
+  "environment": "dev",
+  "helmChartPath": "../../helm/llm-proxy",
+  "helmValues": {
+    "image": {
+      "tag": "main"
+    },
+    "resources": {
+      "limits": {
+        "cpu": "500m",
+        "memory": "256Mi"
+      },
+      "requests": {
+        "cpu": "50m",
+        "memory": "64Mi"
+      }
+    },
+    "redis": {
+      "enabled": true,
+      "auth": {
+        "enabled": false
+      },
+      "master": {
+        "persistence": {
+          "enabled": false
+        }
+      },
+      "replica": {
+        "replicaCount": 0
+      }
+    },
+    "dispatcher": {
+      "services": {
+        "file": {
+          "enabled": true
+        },
+        "helicone": {
+          "enabled": false
+        }
+      }
+    },
+    "autoscaling": {
+      "enabled": false
+    },
+    "ingress": {
+      "enabled": true,
+      "annotations": {
+        "alb.ingress.kubernetes.io/scheme": "internal"
+      }
+    },
+    "serviceMonitor": {
+      "enabled": false
+    },
+    "networkPolicy": {
+      "enabled": false
+    }
+  }
+}
\ No newline at end of file
diff --git a/deploy/aws-cdk/examples/cdk-prod.json b/deploy/aws-cdk/examples/cdk-prod.json
new file mode 100644
index 00000000..9d006fe8
--- /dev/null
+++ b/deploy/aws-cdk/examples/cdk-prod.json
@@ -0,0 +1,139 @@
+# Production environment deployment
+# Use this for production environments with high availability and security
+
+{
+  "stackName": "LlmProxyEksProd",
+  "clusterName": "llm-proxy-prod",
+  "namespace": "llm-proxy",
+  "environment": "prod",
+  "helmChartPath": "../../helm/llm-proxy",
+  "helmValues": {
+    "image": {
+      "tag": "v1.0.0"
+    },
+    "resources": {
+      "limits": {
+        "cpu": "2000m",
+        "memory": "1Gi"
+      },
+      "requests": {
+        "cpu": "500m",
+        "memory": "512Mi"
+      }
+    },
+    "redis": {
+      "enabled": false,
+      "external": {
+        "host": "llm-proxy-redis.cache.amazonaws.com",
+        "port": 6379
+      }
+    },
+    "config": {
+      "database": {
+        "type": "postgresql",
+        "postgresql": {
+          "host": "llm-proxy-postgres.rds.amazonaws.com",
+          "port": 5432,
+          "user": "llmproxy",
+          "database": "llmproxy",
+          "sslmode": "require"
+        }
+      }
+    },
+    "dispatcher": {
+      "enabled": true,
+      "replicaCount": 2,
+      "services": {
+        "file": {
+          "enabled": true
+        },
+        "helicone": {
+          "enabled": true
+        }
+      }
+    },
+    "autoscaling": {
+      "enabled": true,
+      "minReplicas": 3,
+      "maxReplicas": 20,
+      "targetCPUUtilizationPercentage": 70,
+      "targetMemoryUtilizationPercentage": 80
+    },
+    "podDisruptionBudget": {
+      "enabled": true,
+      "minAvailable": 2
+    },
+    "ingress": {
+      "enabled": true,
+      "className": "alb",
+      "annotations": {
+        "alb.ingress.kubernetes.io/scheme": "internet-facing",
+        "alb.ingress.kubernetes.io/ssl-redirect": "443",
+        "alb.ingress.kubernetes.io/certificate-arn": "arn:aws:acm:us-west-2:ACCOUNT:certificate/CERT-ID"
+      },
+      "hosts": [
+        {
+          "host": "llm-proxy.example.com",
+          "paths": [
+            {
+              "path": "/",
+              "pathType": "Prefix"
+            }
+          ]
+        }
+      ]
+    },
+    "serviceMonitor": {
+      "enabled": true,
+      "labels": {
+        "release": "prometheus-operator"
+      }
+    },
+    "networkPolicy": {
+      "enabled": true
+    },
+    "secrets": {
+      "create": false,
+      "external": true,
+      "externalSecrets": {
+        "managementToken": "llm-proxy-secrets",
+        "openaiApiKey": "llm-proxy-secrets",
+        "databasePassword": "llm-proxy-secrets",
+        "redisPassword": "llm-proxy-secrets"
+      }
+    },
+    "affinity": {
+      "podAntiAffinity": {
+        "preferredDuringSchedulingIgnoredDuringExecution": [
+          {
+            "weight": 100,
+            "podAffinityTerm": {
+              "labelSelector": {
+                "matchExpressions": [
+                  {
+                    "key": "app.kubernetes.io/name",
+                    "operator": "In",
+                    "values": ["llm-proxy"]
+                  }
+                ]
+              },
+              "topologyKey": "kubernetes.io/hostname"
+            }
+          }
+        ]
+      }
+    },
+    "topologySpreadConstraints": [
+      {
+        "maxSkew": 1,
+        "topologyKey": "topology.kubernetes.io/zone",
+        "whenUnsatisfiable": "DoNotSchedule",
+        "labelSelector": {
+          "matchLabels": {
+            "app.kubernetes.io/name": "llm-proxy"
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/deploy/aws-cdk/lib/llm-proxy-eks-stack.ts b/deploy/aws-cdk/lib/llm-proxy-eks-stack.ts
new file mode 100644
index 00000000..6c851a38
--- /dev/null
+++ b/deploy/aws-cdk/lib/llm-proxy-eks-stack.ts
@@ -0,0 +1,403 @@
+import * as cdk from 'aws-cdk-lib';
+import * as ec2 from 'aws-cdk-lib/aws-ec2';
+import * as eks from 'aws-cdk-lib/aws-eks';
+import * as iam from 'aws-cdk-lib/aws-iam';
+import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
+import { Construct } from 'constructs';
+
+export interface LlmProxyEksStackProps extends cdk.StackProps {
+  clusterName: string;
+  namespace: string;
+  helmChart: {
+    chartPath: string;
+    values: Record<string, any>;
+  };
+  vpc?: {
+    maxAzs: number;
+    natGateways: number;
+  };
+  eks?: {
+    version: eks.KubernetesVersion;
+    nodeGroups: Array<{
+      name: string;
+      instanceTypes: ec2.InstanceType[];
+      minSize: number;
+      maxSize: number;
+      desiredSize: number;
+    }>;
+  };
+  addOns?: {
+    awsLoadBalancerController?: boolean;
+    efsCSIDriver?: boolean;
+    ebsCSIDriver?: boolean;
+    clusterAutoscaler?: boolean;
+    metricsServer?: boolean;
+  };
+}
+
+export class LlmProxyEksStack extends cdk.Stack {
+  public readonly cluster: eks.Cluster;
+  public readonly namespace: eks.KubernetesNamespace;
+
+  constructor(scope: Construct, id: string, props: LlmProxyEksStackProps) {
+    super(scope, id, props);
+
+    // Create VPC
+    const vpc = new ec2.Vpc(this, 'LlmProxyVpc', {
+      maxAzs: props.vpc?.maxAzs || 3,
+      natGateways: props.vpc?.natGateways || 2,
+      subnetConfiguration: [
+        {
+          cidrMask: 24,
+          name: 'public',
+          subnetType: ec2.SubnetType.PUBLIC,
+        },
+        {
+          cidrMask: 24,
+          name: 'private',
+          subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS,
+        },
+      ],
+    });
+
+    // Create IAM role for EKS cluster
+    const clusterRole = new iam.Role(this, 'LlmProxyClusterRole', {
+      assumedBy: new iam.ServicePrincipal('eks.amazonaws.com'),
+      managedPolicies: [
+        iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEKSClusterPolicy'),
+      ],
+    });
+
+    // Create IAM role for node groups
+    const nodeGroupRole = new iam.Role(this, 'LlmProxyNodeGroupRole', {
+      assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'),
+      managedPolicies: [
+        iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEKSWorkerNodePolicy'),
+        iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEKS_CNI_Policy'),
+        iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonEC2ContainerRegistryReadOnly'),
+      ],
+    });
+
+    // Create EKS cluster
+    this.cluster = new eks.Cluster(this, 'LlmProxyCluster', {
+      clusterName: props.clusterName,
+      version: props.eks?.version || eks.KubernetesVersion.V1_28,
+      vpc,
+      role: clusterRole,
+      defaultCapacity: 0, // We'll add managed node groups separately
+      endpointAccess: eks.EndpointAccess.PUBLIC_AND_PRIVATE,
+      outputClusterName: true,
+      outputConfigCommand: true,
+      outputMastersRoleArn: true,
+    });
+
+    // Add managed node groups
+    props.eks?.nodeGroups?.forEach((nodeGroupConfig, index) => {
+      this.cluster.addNodegroupCapacity(`NodeGroup${index}`, {
+        nodegroupName: nodeGroupConfig.name,
+        instanceTypes: nodeGroupConfig.instanceTypes,
+        minSize: nodeGroupConfig.minSize,
+        maxSize: nodeGroupConfig.maxSize,
+        desiredSize: nodeGroupConfig.desiredSize,
+        nodeRole: nodeGroupRole,
+        subnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
+        amiType: eks.NodegroupAmiType.AL2_X86_64,
+        capacityType: eks.CapacityType.ON_DEMAND,
+        diskSize: 50,
+        tags: {
+          'kubernetes.io/cluster-autoscaler/enabled': 'true',
+          [`kubernetes.io/cluster-autoscaler/${props.clusterName}`]: 'owned',
+        },
+      });
+    });
+
+    // Install add-ons
+    if (props.addOns?.awsLoadBalancerController) {
+      this.addAwsLoadBalancerController();
+    }
+
+    if (props.addOns?.efsCSIDriver) {
+      this.addEfsCSIDriver();
+    }
+
+    if (props.addOns?.ebsCSIDriver) {
+      this.addEbsCSIDriver();
+    }
+
+    if (props.addOns?.clusterAutoscaler) {
+      this.addClusterAutoscaler(props.clusterName);
+    }
+
+    if (props.addOns?.metricsServer) {
+      this.addMetricsServer();
+    }
+
+    // Create namespace for LLM Proxy
+    this.namespace = this.cluster.addManifest('LlmProxyNamespace', {
+      apiVersion: 'v1',
+      kind: 'Namespace',
+      metadata: {
+        name: props.namespace,
+        labels: {
+          'app.kubernetes.io/name': 'llm-proxy',
+          'app.kubernetes.io/managed-by': 'cdk',
+        },
+      },
+    });
+
+    // Create secrets for LLM Proxy
+    this.createSecrets(props.namespace);
+
+    // Create IRSA for LLM Proxy
+    const serviceAccount = this.createServiceAccount(props.namespace);
+
+    // Deploy Helm chart
+    this.deployHelmChart(props);
+
+    // Output important information
+    new cdk.CfnOutput(this, 'ClusterName', {
+      value: this.cluster.clusterName,
+      description: 'EKS Cluster Name',
+    });
+
+    new cdk.CfnOutput(this, 'ClusterEndpoint', {
+      value: this.cluster.clusterEndpoint,
+      description: 'EKS Cluster Endpoint',
+    });
+
+    new cdk.CfnOutput(this, 'KubectlCommand', {
+      value: `aws eks update-kubeconfig --region ${this.region} --name ${this.cluster.clusterName}`,
+      description: 'Command to configure kubectl',
+    });
+
+    new cdk.CfnOutput(this, 'LlmProxyNamespace', {
+      value: props.namespace,
+      description: 'LLM Proxy Kubernetes Namespace',
+    });
+  }
+
+  private addAwsLoadBalancerController(): void {
+    // Add AWS Load Balancer Controller
+    this.cluster.addHelmChart('AwsLoadBalancerController', {
+      chart: 'aws-load-balancer-controller',
+      repository: 'https://aws.github.io/eks-charts',
+      namespace: 'kube-system',
+      values: {
+        clusterName: this.cluster.clusterName,
+        serviceAccount: {
+          create: false,
+          name: 'aws-load-balancer-controller',
+        },
+      },
+    });
+
+    // Create IRSA for AWS Load Balancer Controller
+    const albServiceAccount = this.cluster.addServiceAccount('aws-load-balancer-controller', {
+      name: 'aws-load-balancer-controller',
+      namespace: 'kube-system',
+    });
+
+    albServiceAccount.role.addManagedPolicy(
+      iam.ManagedPolicy.fromAwsManagedPolicyName('ElasticLoadBalancingFullAccess')
+    );
+  }
+
+  private addEfsCSIDriver(): void {
+    this.cluster.addHelmChart('EfsCSIDriver', {
+      chart: 'aws-efs-csi-driver',
+      repository: 'https://kubernetes-sigs.github.io/aws-efs-csi-driver',
+      namespace: 'kube-system',
+    });
+  }
+
+  private addEbsCSIDriver(): void {
+    this.cluster.addHelmChart('EbsCSIDriver', {
+      chart: 'aws-ebs-csi-driver',
+      repository: 'https://kubernetes-sigs.github.io/aws-ebs-csi-driver',
+      namespace: 'kube-system',
+    });
+  }
+
+  private addClusterAutoscaler(clusterName: string): void {
+    this.cluster.addHelmChart('ClusterAutoscaler', {
+      chart: 'cluster-autoscaler',
+      repository: 'https://kubernetes.github.io/autoscaler',
+      namespace: 'kube-system',
+      values: {
+        autoDiscovery: {
+          clusterName: clusterName,
+        },
+        awsRegion: this.region,
+      },
+    });
+  }
+
+  private addMetricsServer(): void {
+    this.cluster.addHelmChart('MetricsServer', {
+      chart: 'metrics-server',
+      repository: 'https://kubernetes-sigs.github.io/metrics-server',
+      namespace: 'kube-system',
+    });
+  }
+
+  private createSecrets(namespace: string): void {
+    // Create AWS Secrets Manager secret for management token
+    const managementTokenSecret = new secretsmanager.Secret(this, 'LlmProxyManagementToken', {
+      secretName: `llm-proxy/management-token`,
+      description: 'LLM Proxy Management Token',
+      generateSecretString: {
+        secretStringTemplate: JSON.stringify({}),
+        generateStringKey: 'token',
+        excludeCharacters: '"@/\\\'',
+        passwordLength: 32,
+      },
+    });
+
+    // Create External Secret for management token
+    this.cluster.addManifest('LlmProxyManagementTokenExternalSecret', {
+      apiVersion: 'external-secrets.io/v1beta1',
+      kind: 'ExternalSecret',
+      metadata: {
+        name: 'llm-proxy-management-token',
+        namespace: namespace,
+      },
+      spec: {
+        refreshInterval: '1h',
+        secretStoreRef: {
+          name: 'aws-secrets-manager',
+          kind: 'SecretStore',
+        },
+        target: {
+          name: 'llm-proxy-secrets',
+          creationPolicy: 'Owner',
+        },
+        data: [
+          {
+            secretKey: 'management-token',
+            remoteRef: {
+              key: managementTokenSecret.secretName,
+              property: 'token',
+            },
+          },
+        ],
+      },
+    });
+
+    // Create SecretStore for AWS Secrets Manager
+    this.cluster.addManifest('AwsSecretsManagerStore', {
+      apiVersion: 'external-secrets.io/v1beta1',
+      kind: 'SecretStore',
+      metadata: {
+        name: 'aws-secrets-manager',
+        namespace: namespace,
+      },
+      spec: {
+        provider: {
+          aws: {
+            service: 'SecretsManager',
+            region: this.region,
+            auth: {
+              jwt: {
+                serviceAccountRef: {
+                  name: 'llm-proxy',
+                },
+              },
+            },
+          },
+        },
+      },
+    });
+  }
+
+  private createServiceAccount(namespace: string): eks.ServiceAccount {
+    // Create IRSA for LLM Proxy with necessary permissions
+    const serviceAccount = this.cluster.addServiceAccount('llm-proxy', {
+      name: 'llm-proxy',
+      namespace: namespace,
+    });
+
+    // Add permissions for Secrets Manager
+    serviceAccount.role.addToPolicy(new iam.PolicyStatement({
+      effect: iam.Effect.ALLOW,
+      actions: [
+        'secretsmanager:GetSecretValue',
+        'secretsmanager:DescribeSecret',
+      ],
+      resources: [`arn:aws:secretsmanager:${this.region}:${this.account}:secret:llm-proxy/*`],
+    }));
+
+    // Add permissions for CloudWatch (if needed for logging)
+    serviceAccount.role.addToPolicy(new iam.PolicyStatement({
+      effect: iam.Effect.ALLOW,
+      actions: [
+        'logs:CreateLogGroup',
+        'logs:CreateLogStream',
+        'logs:PutLogEvents',
+        'logs:DescribeLogStreams',
+      ],
+      resources: [`arn:aws:logs:${this.region}:${this.account}:log-group:/aws/llm-proxy/*`],
+    }));
+
+    return serviceAccount;
+  }
+
+  private deployHelmChart(props: LlmProxyEksStackProps): void {
+    // Deploy LLM Proxy Helm chart
+    const helmChart = this.cluster.addHelmChart('LlmProxy', {
+      chart: 'llm-proxy',
+      chartAsset: cdk.aws_s3_assets.Asset.fromAsset(props.helmChart.chartPath),
+      namespace: props.namespace,
+      timeout: cdk.Duration.minutes(10),
+      wait: true,
+      values: {
+        // Default values
+        image: {
+          repository: 'ghcr.io/sofatutor/llm-proxy',
+          tag: 'latest',
+        },
+        serviceAccount: {
+          create: false,
+          name: 'llm-proxy',
+        },
+        secrets: {
+          create: false,
+          external: true,
+          externalSecrets: {
+            managementToken: 'llm-proxy-secrets',
+          },
+        },
+        ingress: {
+          enabled: true,
+          className: 'alb',
+          annotations: {
+            'kubernetes.io/ingress.class': 'alb',
+            'alb.ingress.kubernetes.io/scheme': 'internet-facing',
+            'alb.ingress.kubernetes.io/target-type': 'ip',
+            'alb.ingress.kubernetes.io/healthcheck-path': '/health',
+            'alb.ingress.kubernetes.io/ssl-redirect': '443',
+          },
+        },
+        autoscaling: {
+          enabled: true,
+          minReplicas: 2,
+          maxReplicas: 10,
+        },
+        podDisruptionBudget: {
+          enabled: true,
+          minAvailable: 1,
+        },
+        serviceMonitor: {
+          enabled: true,
+        },
+        networkPolicy: {
+          enabled: true,
+        },
+        // Merge with custom values
+        ...props.helmChart.values,
+      },
+    });
+
+    // Add dependency on namespace and service account
+    helmChart.node.addDependency(this.namespace);
+  }
+}
\ No newline at end of file
diff --git a/deploy/aws-cdk/package.json b/deploy/aws-cdk/package.json
new file mode 100644
index 00000000..248082b6
--- /dev/null
+++ b/deploy/aws-cdk/package.json
@@ -0,0 +1,38 @@
+{
+  "name": "llm-proxy-eks-cdk",
+  "version": "1.0.0",
+  "description": "AWS CDK for deploying LLM Proxy to EKS",
+  "main": "lib/index.js",
+  "scripts": {
+    "build": "tsc",
+    "watch": "tsc -w",
+    "test": "jest",
+    "cdk": "cdk",
+    "synth": "cdk synth",
+    "deploy": "cdk deploy",
+    "diff": "cdk diff",
+    "destroy": "cdk destroy"
+  },
+  "devDependencies": {
+    "@types/jest": "^29.5.0",
+    "@types/node": "^18.15.0",
+    "jest": "^29.5.0",
+    "ts-jest": "^29.1.0",
+    "typescript": "^5.0.0"
+  },
+  "dependencies": {
+    "aws-cdk-lib": "^2.80.0",
+    "@aws-cdk/aws-eks": "^1.204.0",
+    "constructs": "^10.2.0"
+  },
+  "keywords": [
+    "aws",
+    "cdk",
+    "eks",
+    "kubernetes",
+    "llm-proxy",
+    "helm"
+  ],
+  "author": "sofatutor",
+  "license": "MIT"
+}
\ No newline at end of file
diff --git a/deploy/aws-cdk/src/app.ts b/deploy/aws-cdk/src/app.ts
new file mode 100644
index 00000000..be291c44
--- /dev/null
+++ b/deploy/aws-cdk/src/app.ts
@@ -0,0 +1,64 @@
+#!/usr/bin/env node
+import 'source-map-support/register';
+import * as cdk from 'aws-cdk-lib';
+import { LlmProxyEksStack } from '../lib/llm-proxy-eks-stack';
+
+const app = new cdk.App();
+
+// Get configuration from context or environment
+const env = {
+  account: process.env.CDK_DEFAULT_ACCOUNT,
+  region: process.env.CDK_DEFAULT_REGION || 'us-west-2',
+};
+
+const stackName = app.node.tryGetContext('stackName') || 'LlmProxyEks';
+const clusterName = app.node.tryGetContext('clusterName') || 'llm-proxy-cluster';
+const namespace = app.node.tryGetContext('namespace') || 'llm-proxy';
+
+new LlmProxyEksStack(app, stackName, {
+  env,
+  clusterName,
+  namespace,
+  description: 'EKS cluster with LLM Proxy Helm chart deployment',
+  
+  // Additional configuration from context
+  helmChart: {
+    chartPath: app.node.tryGetContext('helmChartPath') || '../../helm/llm-proxy',
+    values: app.node.tryGetContext('helmValues') || {},
+  },
+  
+  // VPC configuration
+  vpc: {
+    maxAzs: 3,
+    natGateways: 2,
+  },
+  
+  // EKS configuration
+  eks: {
+    version: cdk.aws_eks.KubernetesVersion.V1_28,
+    nodeGroups: [
+      {
+        name: 'compute',
+        instanceTypes: [cdk.aws_ec2.InstanceType.of(cdk.aws_ec2.InstanceClass.M5, cdk.aws_ec2.InstanceSize.LARGE)],
+        minSize: 2,
+        maxSize: 10,
+        desiredSize: 3,
+      },
+    ],
+  },
+  
+  // Add-ons
+  addOns: {
+    awsLoadBalancerController: true,
+    efsCSIDriver: true,
+    ebsCSIDriver: true,
+    clusterAutoscaler: true,
+    metricsServer: true,
+  },
+  
+  tags: {
+    Project: 'LlmProxy',
+    Environment: app.node.tryGetContext('environment') || 'dev',
+    ManagedBy: 'CDK',
+  },
+});
\ No newline at end of file
diff --git a/deploy/aws-cdk/test/llm-proxy-eks-stack.test.ts b/deploy/aws-cdk/test/llm-proxy-eks-stack.test.ts
new file mode 100644
index 00000000..07fa8bbc
--- /dev/null
+++ b/deploy/aws-cdk/test/llm-proxy-eks-stack.test.ts
@@ -0,0 +1,163 @@
+import * as cdk from 'aws-cdk-lib';
+import { Template } from 'aws-cdk-lib/assertions';
+import { LlmProxyEksStack } from '../lib/llm-proxy-eks-stack';
+
+describe('LlmProxyEksStack', () => {
+  test('creates EKS cluster with proper configuration', () => {
+    const app = new cdk.App();
+    const stack = new LlmProxyEksStack(app, 'TestStack', {
+      clusterName: 'test-cluster',
+      namespace: 'test-namespace',
+      helmChart: {
+        chartPath: '/test/path',
+        values: {},
+      },
+    });
+
+    const template = Template.fromStack(stack);
+
+    // Verify EKS cluster is created
+    template.hasResourceProperties('AWS::EKS::Cluster', {
+      Name: 'test-cluster',
+    });
+
+    // Verify VPC is created
+    template.hasResourceProperties('AWS::EC2::VPC', {
+      CidrBlock: '10.0.0.0/16',
+    });
+
+    // Verify IAM roles are created
+    template.hasResourceProperties('AWS::IAM::Role', {
+      AssumeRolePolicyDocument: {
+        Statement: [
+          {
+            Effect: 'Allow',
+            Principal: {
+              Service: 'eks.amazonaws.com',
+            },
+            Action: 'sts:AssumeRole',
+          },
+        ],
+      },
+    });
+  });
+
+  test('creates proper security groups', () => {
+    const app = new cdk.App();
+    const stack = new LlmProxyEksStack(app, 'TestStack', {
+      clusterName: 'test-cluster',
+      namespace: 'test-namespace',
+      helmChart: {
+        chartPath: '/test/path',
+        values: {},
+      },
+    });
+
+    const template = Template.fromStack(stack);
+
+    // Verify security groups are created
+    template.resourceCountIs('AWS::EC2::SecurityGroup', 2); // Cluster + Control plane
+  });
+
+  test('creates node groups with correct configuration', () => {
+    const app = new cdk.App();
+    const stack = new LlmProxyEksStack(app, 'TestStack', {
+      clusterName: 'test-cluster',
+      namespace: 'test-namespace',
+      helmChart: {
+        chartPath: '/test/path',
+        values: {},
+      },
+      eks: {
+        version: cdk.aws_eks.KubernetesVersion.V1_28,
+        nodeGroups: [
+          {
+            name: 'compute',
+            instanceTypes: [cdk.aws_ec2.InstanceType.of(cdk.aws_ec2.InstanceClass.M5, cdk.aws_ec2.InstanceSize.LARGE)],
+            minSize: 2,
+            maxSize: 10,
+            desiredSize: 3,
+          },
+        ],
+      },
+    });
+
+    const template = Template.fromStack(stack);
+
+    // Verify node group is created
+    template.hasResourceProperties('AWS::EKS::Nodegroup', {
+      NodegroupName: 'compute',
+      ScalingConfig: {
+        MinSize: 2,
+        MaxSize: 10,
+        DesiredSize: 3,
+      },
+    });
+  });
+
+  test('creates secrets and IRSA correctly', () => {
+    const app = new cdk.App();
+    const stack = new LlmProxyEksStack(app, 'TestStack', {
+      clusterName: 'test-cluster',
+      namespace: 'test-namespace',
+      helmChart: {
+        chartPath: '/test/path',
+        values: {},
+      },
+    });
+
+    const template = Template.fromStack(stack);
+
+    // Verify Secrets Manager secret is created
+    template.hasResourceProperties('AWS::SecretsManager::Secret', {
+      Name: 'llm-proxy/management-token',
+    });
+
+    // Verify service account IAM role is created
+    template.hasResourceProperties('AWS::IAM::Role', {
+      AssumeRolePolicyDocument: {
+        Statement: [
+          {
+            Effect: 'Allow',
+            Condition: {
+              StringEquals: {
+                'aws:RequestedRegion': cdk.Aws.REGION,
+              },
+            },
+          },
+        ],
+      },
+    });
+  });
+
+  test('outputs essential information', () => {
+    const app = new cdk.App();
+    const stack = new LlmProxyEksStack(app, 'TestStack', {
+      clusterName: 'test-cluster',
+      namespace: 'test-namespace',
+      helmChart: {
+        chartPath: '/test/path',
+        values: {},
+      },
+    });
+
+    const template = Template.fromStack(stack);
+
+    // Verify outputs are created
+    template.hasOutput('ClusterName', {
+      Description: 'EKS Cluster Name',
+    });
+
+    template.hasOutput('ClusterEndpoint', {
+      Description: 'EKS Cluster Endpoint',
+    });
+
+    template.hasOutput('KubectlCommand', {
+      Description: 'Command to configure kubectl',
+    });
+
+    template.hasOutput('LlmProxyNamespace', {
+      Description: 'LLM Proxy Kubernetes Namespace',
+    });
+  });
+});
\ No newline at end of file
diff --git a/deploy/aws-cdk/tsconfig.json b/deploy/aws-cdk/tsconfig.json
new file mode 100644
index 00000000..66a022cd
--- /dev/null
+++ b/deploy/aws-cdk/tsconfig.json
@@ -0,0 +1,32 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "module": "commonjs",
+    "lib": ["ES2020"],
+    "declaration": true,
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "noImplicitThis": true,
+    "alwaysStrict": true,
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noImplicitReturns": true,
+    "noFallthroughCasesInSwitch": false,
+    "inlineSourceMap": true,
+    "inlineSources": true,
+    "experimentalDecorators": true,
+    "strictPropertyInitialization": false,
+    "typeRoots": ["./node_modules/@types"],
+    "outDir": "./dist",
+    "rootDir": "./src"
+  },
+  "exclude": [
+    "node_modules",
+    "dist",
+    "**/*.test.ts"
+  ],
+  "include": [
+    "src/**/*"
+  ]
+}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/Chart.yaml b/deploy/helm/llm-proxy/Chart.yaml
new file mode 100644
index 00000000..2cfd2fdc
--- /dev/null
+++ b/deploy/helm/llm-proxy/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: llm-proxy
+description: A Helm chart for deploying LLM Proxy - transparent OpenAI-compatible reverse proxy with token management, rate limiting, and async events
+version: 0.1.0
+appVersion: "latest"
+type: application
+keywords:
+  - llm
+  - proxy
+  - openai
+  - api
+  - reverse-proxy
+  - token-management
+home: https://github.com/sofatutor/llm-proxy
+sources:
+  - https://github.com/sofatutor/llm-proxy
+maintainers:
+  - name: sofatutor
+    url: https://github.com/sofatutor
+dependencies:
+  - name: redis
+    version: "18.1.5"
+    repository: "https://charts.bitnami.com/bitnami"
+    condition: redis.enabled
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/examples/values-development.yaml b/deploy/helm/llm-proxy/examples/values-development.yaml
new file mode 100644
index 00000000..864b94f1
--- /dev/null
+++ b/deploy/helm/llm-proxy/examples/values-development.yaml
@@ -0,0 +1,100 @@
+# Example values for development environment
+# This configuration is suitable for local development and testing
+
+# Image configuration - use latest for development
+image:
+  tag: "main"  # Use main branch builds for development
+
+# Single replica for development
+replicaCount: 1
+
+# Minimal resources for development
+resources:
+  limits:
+    cpu: 500m
+    memory: 256Mi
+  requests:
+    cpu: 50m
+    memory: 64Mi
+
+# Development configuration
+config:
+  managementToken: "dev-management-token-change-me"
+  logLevel: "debug"
+  logFormat: "text"  # Easier to read in development
+  
+  # Security - relaxed for development
+  security:
+    corsAllowedOrigins: "*"
+    defaultTokenLifetime: "7d"  # Longer lifetime for convenience
+  
+  # Observability
+  observability:
+    enabled: true
+    bufferSize: 100  # Smaller buffer for development
+
+# Enable Redis for event bus testing
+redis:
+  enabled: true
+  auth:
+    enabled: false  # No auth for development
+  master:
+    persistence:
+      enabled: false  # No persistence needed for development
+  replica:
+    replicaCount: 0  # No replicas needed for development
+
+# Enable file dispatcher for local debugging
+dispatcher:
+  enabled: true
+  services:
+    file:
+      enabled: true
+      endpoint: "/app/logs/dev-events.jsonl"
+    helicone:
+      enabled: false
+
+# Admin UI enabled for development
+adminUI:
+  enabled: true
+
+# Disable autoscaling for development
+autoscaling:
+  enabled: false
+
+# Disable pod disruption budget for development
+podDisruptionBudget:
+  enabled: false
+
+# Simple storage for development
+persistence:
+  enabled: true
+  size: 1Gi
+  storageClass: ""  # Use default storage class
+
+# No ingress for development (use port-forward)
+ingress:
+  enabled: false
+
+# Disable monitoring for development
+serviceMonitor:
+  enabled: false
+podMonitor:
+  enabled: false
+
+# Disable network policies for development
+networkPolicy:
+  enabled: false
+
+# Health checks with shorter intervals for faster feedback
+healthChecks:
+  liveness:
+    initialDelaySeconds: 10
+    periodSeconds: 10
+  readiness:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+  startup:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    failureThreshold: 10
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/examples/values-production.yaml b/deploy/helm/llm-proxy/examples/values-production.yaml
new file mode 100644
index 00000000..668cedc6
--- /dev/null
+++ b/deploy/helm/llm-proxy/examples/values-production.yaml
@@ -0,0 +1,258 @@
+# Example values for production environment
+# This configuration is suitable for production deployment with high availability
+
+# Use specific stable image tag
+image:
+  repository: ghcr.io/sofatutor/llm-proxy
+  tag: "v1.0.0"  # Use specific version tag
+  pullPolicy: IfNotPresent
+
+# Service account with annotations for AWS IRSA (if using AWS)
+serviceAccount:
+  create: true
+  annotations:
+    # eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/llm-proxy-role
+
+# Production resources
+resources:
+  limits:
+    cpu: 2000m
+    memory: 1Gi
+  requests:
+    cpu: 500m
+    memory: 512Mi
+
+# Production configuration
+config:
+  # Use external secret for management token
+  managementToken: ""  # Provided via external secret
+  logLevel: "info"
+  logFormat: "json"
+  
+  # Database - use PostgreSQL for production
+  database:
+    type: "postgresql"
+    postgresql:
+      host: "llm-proxy-postgres.database.svc.cluster.local"
+      port: 5432
+      user: "llmproxy"
+      password: ""  # Provided via external secret
+      database: "llmproxy"
+      sslmode: "require"
+  
+  # Security - strict for production
+  security:
+    corsAllowedOrigins: "https://llm-proxy.example.com"
+    maskApiKeys: true
+    validateApiKeyFormat: true
+    defaultTokenLifetime: "30d"
+    defaultTokenRequestLimit: 5000
+  
+  # Rate limiting
+  rateLimiting:
+    globalRateLimit: 1000
+    ipRateLimit: 100
+  
+  # Performance tuning for production
+  performance:
+    maxConcurrentRequests: 500
+    workerPoolSize: 20
+  
+  # Monitoring enabled
+  monitoring:
+    enableMetrics: true
+  
+  # Observability
+  observability:
+    enabled: true
+    bufferSize: 10000
+
+# External Redis for production scalability
+redis:
+  enabled: false
+  external:
+    host: "llm-proxy-redis.cache.svc.cluster.local"
+    port: 6379
+    password: ""  # Provided via external secret
+
+# Dispatcher configuration for production
+dispatcher:
+  enabled: true
+  replicaCount: 2  # Multiple replicas for reliability
+  resources:
+    limits:
+      cpu: 1000m
+      memory: 512Mi
+    requests:
+      cpu: 100m
+      memory: 128Mi
+  services:
+    file:
+      enabled: true
+      endpoint: "/app/logs/production-events.jsonl"
+    helicone:
+      enabled: true
+      apiKey: ""  # Provided via external secret
+
+# Admin UI enabled with secure configuration
+adminUI:
+  enabled: true
+  apiBaseUrl: "https://llm-proxy.example.com"
+
+# Enable autoscaling for production
+autoscaling:
+  enabled: true
+  minReplicas: 3
+  maxReplicas: 20
+  targetCPUUtilizationPercentage: 70
+  targetMemoryUtilizationPercentage: 80
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 300
+      policies:
+      - type: Percent
+        value: 10
+        periodSeconds: 60
+    scaleUp:
+      stabilizationWindowSeconds: 60
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 30
+
+# Pod disruption budget for high availability
+podDisruptionBudget:
+  enabled: true
+  minAvailable: 2
+
+# Production storage
+persistence:
+  enabled: true
+  size: 100Gi
+  storageClass: "gp3"  # Use high-performance storage
+  accessModes:
+    - ReadWriteOnce
+
+# Ingress configuration for production
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+    nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
+    cert-manager.io/cluster-issuer: "letsencrypt-prod"
+    nginx.ingress.kubernetes.io/rate-limit: "100"
+    nginx.ingress.kubernetes.io/rate-limit-window: "1m"
+  hosts:
+    - host: llm-proxy.example.com
+      paths:
+        - path: /
+          pathType: Prefix
+  tls:
+    - secretName: llm-proxy-tls
+      hosts:
+        - llm-proxy.example.com
+
+# Monitoring enabled for production
+serviceMonitor:
+  enabled: true
+  interval: 15s
+  labels:
+    release: prometheus-operator
+podMonitor:
+  enabled: true
+  interval: 15s
+  labels:
+    release: prometheus-operator
+
+# Network policies for security
+networkPolicy:
+  enabled: true
+  policyTypes:
+    - Ingress
+    - Egress
+
+# Production health checks
+healthChecks:
+  liveness:
+    enabled: true
+    initialDelaySeconds: 60
+    periodSeconds: 30
+    timeoutSeconds: 10
+    failureThreshold: 3
+  readiness:
+    enabled: true
+    initialDelaySeconds: 30
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 3
+  startup:
+    enabled: true
+    initialDelaySeconds: 30
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 60
+
+# Pod security context for production
+podSecurityContext:
+  fsGroup: 2000
+  runAsNonRoot: true
+  runAsUser: 1000
+  runAsGroup: 3000
+  seccompProfile:
+    type: RuntimeDefault
+
+# Container security context
+securityContext:
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+    - ALL
+  readOnlyRootFilesystem: true
+  runAsNonRoot: true
+  runAsUser: 1000
+
+# Node selection for production workloads
+nodeSelector:
+  kubernetes.io/arch: amd64
+  node-type: compute
+
+# Tolerations for dedicated nodes (if using dedicated nodes)
+tolerations: []
+  # - key: "workload"
+  #   operator: "Equal"
+  #   value: "llm-proxy"
+  #   effect: "NoSchedule"
+
+# Anti-affinity for high availability
+affinity:
+  podAntiAffinity:
+    preferredDuringSchedulingIgnoredDuringExecution:
+    - weight: 100
+      podAffinityTerm:
+        labelSelector:
+          matchExpressions:
+          - key: app.kubernetes.io/name
+            operator: In
+            values:
+            - llm-proxy
+        topologyKey: kubernetes.io/hostname
+
+# Topology spread constraints for even distribution
+topologySpreadConstraints:
+  - maxSkew: 1
+    topologyKey: topology.kubernetes.io/zone
+    whenUnsatisfiable: DoNotSchedule
+    labelSelector:
+      matchLabels:
+        app.kubernetes.io/name: llm-proxy
+
+# Use external secrets for sensitive data
+secrets:
+  create: false
+  external: true
+  externalSecrets:
+    managementToken: "llm-proxy-management-token"
+    openaiApiKey: "llm-proxy-openai-key"
+    databasePassword: "llm-proxy-db-password"
+    redisPassword: "llm-proxy-redis-password"
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/NOTES.txt b/deploy/helm/llm-proxy/templates/NOTES.txt
new file mode 100644
index 00000000..c7b7119c
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/NOTES.txt
@@ -0,0 +1,75 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llm-proxy.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llm-proxy.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llm-proxy.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "{{ include "llm-proxy.selectorLabels" . }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
+
+2. Check the deployment status:
+  kubectl get pods --namespace {{ .Release.Namespace }} -l "{{ include "llm-proxy.selectorLabels" . }}"
+
+3. Get application logs:
+  kubectl logs --namespace {{ .Release.Namespace }} -l "{{ include "llm-proxy.selectorLabels" . }}" -f
+
+4. Check health status:
+{{- if .Values.ingress.enabled }}
+{{- $host := index .Values.ingress.hosts 0 }}
+  curl -f http{{ if .Values.ingress.tls }}s{{ end }}://{{ $host.host }}/health
+{{- else }}
+  kubectl --namespace {{ .Release.Namespace }} port-forward svc/{{ include "llm-proxy.fullname" . }} 8080:{{ .Values.service.port }}
+  curl -f http://localhost:8080/health
+{{- end }}
+
+5. Management API access:
+{{- if not .Values.config.managementToken }}
+   ⚠️  WARNING: No management token provided! 
+   You must set config.managementToken in your values or use an external secret.
+{{- else }}
+   Management token is configured. Use it for admin operations via:
+   - Management API: /manage/projects, /manage/tokens
+{{- if .Values.adminUI.enabled }}
+   - Admin UI: {{ .Values.adminUI.path }}
+{{- end }}
+{{- end }}
+
+6. {{- if .Values.redis.enabled }}Redis is deployed as a dependency.{{- else }}External Redis configuration: {{ include "llm-proxy.redisAddr" . }}{{- end }}
+
+{{- if .Values.dispatcher.enabled }}
+7. Event dispatchers are running:
+{{- if .Values.dispatcher.services.file.enabled }}
+   - File dispatcher: logs events to {{ .Values.dispatcher.services.file.endpoint }}
+{{- end }}
+{{- if .Values.dispatcher.services.helicone.enabled }}
+   - Helicone dispatcher: forwards events to Helicone
+{{- end }}
+{{- end }}
+
+{{- if .Values.autoscaling.enabled }}
+8. Horizontal Pod Autoscaler is enabled ({{ .Values.autoscaling.minReplicas }}-{{ .Values.autoscaling.maxReplicas }} replicas)
+{{- end }}
+
+{{- if .Values.persistence.enabled }}
+9. Persistent volumes are configured:
+   - Data: {{ .Values.persistence.dataPath }} ({{ .Values.persistence.size }})
+   - Logs: {{ .Values.persistence.logsPath }} ({{ .Values.persistence.size }})
+{{- end }}
+
+For more information and advanced configuration options, see:
+- GitHub: https://github.com/sofatutor/llm-proxy
+- Documentation: https://github.com/sofatutor/llm-proxy/tree/main/docs
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/_helpers.tpl b/deploy/helm/llm-proxy/templates/_helpers.tpl
new file mode 100644
index 00000000..96d654be
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/_helpers.tpl
@@ -0,0 +1,316 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm-proxy.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "llm-proxy.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm-proxy.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm-proxy.labels" -}}
+helm.sh/chart: {{ include "llm-proxy.chart" . }}
+{{ include "llm-proxy.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llm-proxy.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-proxy.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Dispatcher labels
+*/}}
+{{- define "llm-proxy.dispatcherLabels" -}}
+helm.sh/chart: {{ include "llm-proxy.chart" . }}
+{{ include "llm-proxy.dispatcherSelectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Dispatcher selector labels
+*/}}
+{{- define "llm-proxy.dispatcherSelectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-proxy.name" . }}-dispatcher
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: dispatcher
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-proxy.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-proxy.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create the image name
+*/}}
+{{- define "llm-proxy.image" -}}
+{{- $registry := .Values.global.imageRegistry | default "" }}
+{{- $repository := .Values.image.repository }}
+{{- $tag := .Values.image.tag | default .Chart.AppVersion }}
+{{- if $registry }}
+{{- printf "%s/%s:%s" $registry $repository $tag }}
+{{- else }}
+{{- printf "%s:%s" $repository $tag }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create the dispatcher image name
+*/}}
+{{- define "llm-proxy.dispatcherImage" -}}
+{{- $registry := .Values.global.imageRegistry | default "" }}
+{{- $repository := .Values.dispatcher.image.repository | default .Values.image.repository }}
+{{- $tag := .Values.dispatcher.image.tag | default (.Values.image.tag | default .Chart.AppVersion) }}
+{{- if $registry }}
+{{- printf "%s/%s:%s" $registry $repository $tag }}
+{{- else }}
+{{- printf "%s:%s" $repository $tag }}
+{{- end }}
+{{- end }}
+
+{{/*
+Redis connection settings
+*/}}
+{{- define "llm-proxy.redisAddr" -}}
+{{- if .Values.redis.enabled }}
+{{- printf "%s-redis-master:6379" .Release.Name }}
+{{- else }}
+{{- printf "%s:%d" .Values.redis.external.host (.Values.redis.external.port | int) }}
+{{- end }}
+{{- end }}
+
+{{/*
+Database connection string
+*/}}
+{{- define "llm-proxy.databaseUrl" -}}
+{{- if eq .Values.config.database.type "postgresql" }}
+{{- printf "postgres://%s:%s@%s:%d/%s?sslmode=%s" .Values.config.database.postgresql.user .Values.config.database.postgresql.password .Values.config.database.postgresql.host (.Values.config.database.postgresql.port | int) .Values.config.database.postgresql.database .Values.config.database.postgresql.sslmode }}
+{{- else }}
+{{- .Values.config.database.sqlite.path }}
+{{- end }}
+{{- end }}
+
+{{/*
+Secret name for management token
+*/}}
+{{- define "llm-proxy.managementTokenSecret" -}}
+{{- if .Values.secrets.external }}
+{{- .Values.secrets.externalSecrets.managementToken }}
+{{- else }}
+{{- printf "%s-secrets" (include "llm-proxy.fullname" .) }}
+{{- end }}
+{{- end }}
+
+{{/*
+Secret name for OpenAI API key
+*/}}
+{{- define "llm-proxy.openaiApiKeySecret" -}}
+{{- if .Values.secrets.external }}
+{{- .Values.secrets.externalSecrets.openaiApiKey }}
+{{- else }}
+{{- printf "%s-secrets" (include "llm-proxy.fullname" .) }}
+{{- end }}
+{{- end }}
+
+{{/*
+Secret name for Redis password
+*/}}
+{{- define "llm-proxy.redisPasswordSecret" -}}
+{{- if .Values.secrets.external }}
+{{- .Values.secrets.externalSecrets.redisPassword }}
+{{- else if .Values.redis.enabled }}
+{{- printf "%s-redis" .Release.Name }}
+{{- else }}
+{{- printf "%s-secrets" (include "llm-proxy.fullname" .) }}
+{{- end }}
+{{- end }}
+
+{{/*
+Common environment variables
+*/}}
+{{- define "llm-proxy.env" -}}
+- name: LISTEN_ADDR
+  value: {{ .Values.config.listenAddr | quote }}
+- name: LOG_LEVEL
+  value: {{ .Values.config.logLevel | quote }}
+- name: LOG_FORMAT
+  value: {{ .Values.config.logFormat | quote }}
+- name: MANAGEMENT_TOKEN
+  valueFrom:
+    secretKeyRef:
+      name: {{ include "llm-proxy.managementTokenSecret" . }}
+      key: management-token
+- name: DATABASE_PATH
+  {{- if eq .Values.config.database.type "postgresql" }}
+  value: {{ include "llm-proxy.databaseUrl" . | quote }}
+  {{- else }}
+  value: {{ .Values.config.database.sqlite.path | quote }}
+  {{- end }}
+- name: OPENAI_API_URL
+  value: {{ .Values.config.openai.apiUrl | quote }}
+- name: REQUEST_TIMEOUT
+  value: {{ .Values.config.openai.requestTimeout | quote }}
+- name: MAX_REQUEST_SIZE
+  value: {{ .Values.config.openai.maxRequestSize | quote }}
+- name: ENABLE_STREAMING
+  value: {{ .Values.config.openai.enableStreaming | quote }}
+- name: CORS_ALLOWED_ORIGINS
+  value: {{ .Values.config.security.corsAllowedOrigins | quote }}
+- name: CORS_ALLOWED_METHODS
+  value: {{ .Values.config.security.corsAllowedMethods | quote }}
+- name: CORS_ALLOWED_HEADERS
+  value: {{ .Values.config.security.corsAllowedHeaders | quote }}
+- name: CORS_MAX_AGE
+  value: {{ .Values.config.security.corsMaxAge | quote }}
+- name: MASK_API_KEYS
+  value: {{ .Values.config.security.maskApiKeys | quote }}
+- name: VALIDATE_API_KEY_FORMAT
+  value: {{ .Values.config.security.validateApiKeyFormat | quote }}
+- name: DEFAULT_TOKEN_LIFETIME
+  value: {{ .Values.config.security.defaultTokenLifetime | quote }}
+- name: DEFAULT_TOKEN_REQUEST_LIMIT
+  value: {{ .Values.config.security.defaultTokenRequestLimit | quote }}
+- name: GLOBAL_RATE_LIMIT
+  value: {{ .Values.config.rateLimiting.globalRateLimit | quote }}
+- name: IP_RATE_LIMIT
+  value: {{ .Values.config.rateLimiting.ipRateLimit | quote }}
+- name: MAX_CONCURRENT_REQUESTS
+  value: {{ .Values.config.performance.maxConcurrentRequests | quote }}
+- name: WORKER_POOL_SIZE
+  value: {{ .Values.config.performance.workerPoolSize | quote }}
+- name: ENABLE_METRICS
+  value: {{ .Values.config.monitoring.enableMetrics | quote }}
+- name: METRICS_PATH
+  value: {{ .Values.config.monitoring.metricsPath | quote }}
+- name: TOKEN_CLEANUP_INTERVAL
+  value: {{ .Values.config.tokenCleanupInterval | quote }}
+- name: OBSERVABILITY_ENABLED
+  value: {{ .Values.config.observability.enabled | quote }}
+- name: OBSERVABILITY_BUFFER_SIZE
+  value: {{ .Values.config.observability.bufferSize | quote }}
+- name: LLM_PROXY_EVENT_BUS
+  value: "redis"
+- name: REDIS_ADDR
+  value: {{ include "llm-proxy.redisAddr" . | quote }}
+{{- if .Values.adminUI.enabled }}
+- name: ADMIN_UI_ENABLED
+  value: "true"
+- name: ADMIN_UI_PATH
+  value: {{ .Values.adminUI.path | quote }}
+{{- if .Values.adminUI.apiBaseUrl }}
+- name: ADMIN_UI_API_BASE_URL
+  value: {{ .Values.adminUI.apiBaseUrl | quote }}
+{{- end }}
+{{- end }}
+{{- range .Values.env }}
+- name: {{ .name }}
+  {{- if .value }}
+  value: {{ .value | quote }}
+  {{- else if .valueFrom }}
+  valueFrom:
+    {{- toYaml .valueFrom | nindent 4 }}
+  {{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Volume mounts
+*/}}
+{{- define "llm-proxy.volumeMounts" -}}
+{{- if .Values.persistence.enabled }}
+- name: data
+  mountPath: {{ .Values.persistence.dataPath }}
+- name: logs
+  mountPath: {{ .Values.persistence.logsPath }}
+{{- end }}
+- name: tmp
+  mountPath: /tmp
+{{- range .Values.volumeMounts }}
+- name: {{ .name }}
+  mountPath: {{ .mountPath }}
+  {{- if .readOnly }}
+  readOnly: {{ .readOnly }}
+  {{- end }}
+  {{- if .subPath }}
+  subPath: {{ .subPath }}
+  {{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Volumes
+*/}}
+{{- define "llm-proxy.volumes" -}}
+{{- if .Values.persistence.enabled }}
+- name: data
+  persistentVolumeClaim:
+    claimName: {{ include "llm-proxy.fullname" . }}-data
+- name: logs
+  persistentVolumeClaim:
+    claimName: {{ include "llm-proxy.fullname" . }}-logs
+{{- end }}
+- name: tmp
+  emptyDir: {}
+{{- range .Values.volumes }}
+- name: {{ .name }}
+  {{- if .configMap }}
+  configMap:
+    name: {{ .configMap.name }}
+    {{- if .configMap.items }}
+    items:
+      {{- toYaml .configMap.items | nindent 6 }}
+    {{- end }}
+  {{- else if .secret }}
+  secret:
+    secretName: {{ .secret.secretName }}
+    {{- if .secret.items }}
+    items:
+      {{- toYaml .secret.items | nindent 6 }}
+    {{- end }}
+  {{- else if .emptyDir }}
+  emptyDir: {}
+  {{- end }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/configmap.yaml b/deploy/helm/llm-proxy/templates/configmap.yaml
new file mode 100644
index 00000000..7bc0105d
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/configmap.yaml
@@ -0,0 +1,58 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}-config
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+data:
+  # API Providers configuration
+  api_providers.yaml: |
+    # Default API provider configuration for OpenAI
+    providers:
+      openai:
+        name: "OpenAI"
+        base_url: {{ .Values.config.openai.apiUrl | quote }}
+        endpoints:
+          - path: "/v1/chat/completions"
+            methods: ["POST"]
+            streaming: true
+          - path: "/v1/completions"
+            methods: ["POST"]
+            streaming: true
+          - path: "/v1/embeddings"
+            methods: ["POST"]
+            streaming: false
+          - path: "/v1/models"
+            methods: ["GET"]
+            streaming: false
+          - path: "/v1/audio/transcriptions"
+            methods: ["POST"]
+            streaming: false
+          - path: "/v1/audio/translations"
+            methods: ["POST"]
+            streaming: false
+          - path: "/v1/images/generations"
+            methods: ["POST"]
+            streaming: false
+        auth:
+          type: "bearer"
+          header: "Authorization"
+        rate_limiting:
+          default_limit: {{ .Values.config.security.defaultTokenRequestLimit }}
+        timeouts:
+          request: {{ .Values.config.openai.requestTimeout | quote }}
+        security:
+          validate_content_type: true
+          max_request_size: {{ .Values.config.openai.maxRequestSize | quote }}
+  
+  # Health check configuration  
+  health.yaml: |
+    health_checks:
+      liveness:
+        path: {{ .Values.healthChecks.liveness.path | quote }}
+        interval: {{ .Values.healthChecks.liveness.periodSeconds }}s
+        timeout: {{ .Values.healthChecks.liveness.timeoutSeconds }}s
+      readiness:
+        path: {{ .Values.healthChecks.readiness.path | quote }}
+        interval: {{ .Values.healthChecks.readiness.periodSeconds }}s
+        timeout: {{ .Values.healthChecks.readiness.timeoutSeconds }}s
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/deployment-dispatcher.yaml b/deploy/helm/llm-proxy/templates/deployment-dispatcher.yaml
new file mode 100644
index 00000000..3fa7cf77
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/deployment-dispatcher.yaml
@@ -0,0 +1,176 @@
+{{- if .Values.dispatcher.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}-dispatcher
+  labels:
+    {{- include "llm-proxy.dispatcherLabels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.dispatcher.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "llm-proxy.dispatcherSelectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
+        checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
+        {{- with .Values.podAnnotations }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      labels:
+        {{- include "llm-proxy.dispatcherSelectorLabels" . | nindent 8 }}
+    spec:
+      {{- with (concat .Values.global.imagePullSecrets .Values.imagePullSecrets) }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "llm-proxy.serviceAccountName" . }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        {{- if .Values.dispatcher.services.file.enabled }}
+        - name: file-dispatcher
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: {{ include "llm-proxy.dispatcherImage" . }}
+          imagePullPolicy: {{ .Values.dispatcher.image.pullPolicy | default .Values.image.pullPolicy }}
+          command: ["/app/entrypoint.sh"]
+          args: 
+            - "dispatcher"
+            - "--service"
+            - "file"
+            - "--endpoint"
+            - {{ .Values.dispatcher.services.file.endpoint | quote }}
+          env:
+            - name: LLM_PROXY_EVENT_BUS
+              value: "redis"
+            - name: REDIS_ADDR
+              value: {{ include "llm-proxy.redisAddr" . | quote }}
+            - name: LOG_LEVEL
+              value: {{ .Values.config.logLevel | quote }}
+            {{- range .Values.env }}
+            - name: {{ .name }}
+              {{- if .value }}
+              value: {{ .value | quote }}
+              {{- else if .valueFrom }}
+              valueFrom:
+                {{- toYaml .valueFrom | nindent 16 }}
+              {{- end }}
+            {{- end }}
+          {{- with .Values.envFrom }}
+          envFrom:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.dispatcher.resources | nindent 12 }}
+          volumeMounts:
+            {{- if .Values.persistence.enabled }}
+            - name: logs
+              mountPath: {{ .Values.persistence.logsPath }}
+            {{- end }}
+            - name: tmp
+              mountPath: /tmp
+            {{- range .Values.volumeMounts }}
+            - name: {{ .name }}
+              mountPath: {{ .mountPath }}
+              {{- if .readOnly }}
+              readOnly: {{ .readOnly }}
+              {{- end }}
+              {{- if .subPath }}
+              subPath: {{ .subPath }}
+              {{- end }}
+            {{- end }}
+        {{- end }}
+        {{- if .Values.dispatcher.services.helicone.enabled }}
+        - name: helicone-dispatcher
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: {{ include "llm-proxy.dispatcherImage" . }}
+          imagePullPolicy: {{ .Values.dispatcher.image.pullPolicy | default .Values.image.pullPolicy }}
+          command: ["/app/entrypoint.sh"]
+          args: 
+            - "dispatcher"
+            - "--service"
+            - "helicone"
+            {{- if .Values.dispatcher.services.helicone.apiKey }}
+            - "--api-key"
+            - {{ .Values.dispatcher.services.helicone.apiKey | quote }}
+            {{- end }}
+          env:
+            - name: LLM_PROXY_EVENT_BUS
+              value: "redis"
+            - name: REDIS_ADDR
+              value: {{ include "llm-proxy.redisAddr" . | quote }}
+            - name: LOG_LEVEL
+              value: {{ .Values.config.logLevel | quote }}
+            {{- range .Values.env }}
+            - name: {{ .name }}
+              {{- if .value }}
+              value: {{ .value | quote }}
+              {{- else if .valueFrom }}
+              valueFrom:
+                {{- toYaml .valueFrom | nindent 16 }}
+              {{- end }}
+            {{- end }}
+          {{- with .Values.envFrom }}
+          envFrom:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.dispatcher.resources | nindent 12 }}
+          volumeMounts:
+            - name: tmp
+              mountPath: /tmp
+            {{- range .Values.volumeMounts }}
+            - name: {{ .name }}
+              mountPath: {{ .mountPath }}
+              {{- if .readOnly }}
+              readOnly: {{ .readOnly }}
+              {{- end }}
+              {{- if .subPath }}
+              subPath: {{ .subPath }}
+              {{- end }}
+            {{- end }}
+        {{- end }}
+      volumes:
+        {{- if .Values.persistence.enabled }}
+        - name: logs
+          persistentVolumeClaim:
+            claimName: {{ include "llm-proxy.fullname" . }}-logs
+        {{- end }}
+        - name: tmp
+          emptyDir: {}
+        {{- range .Values.volumes }}
+        - name: {{ .name }}
+          {{- if .configMap }}
+          configMap:
+            name: {{ .configMap.name }}
+            {{- if .configMap.items }}
+            items:
+              {{- toYaml .configMap.items | nindent 14 }}
+            {{- end }}
+          {{- else if .secret }}
+          secret:
+            secretName: {{ .secret.secretName }}
+            {{- if .secret.items }}
+            items:
+              {{- toYaml .secret.items | nindent 14 }}
+            {{- end }}
+          {{- else if .emptyDir }}
+          emptyDir: {}
+          {{- end }}
+        {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/deployment.yaml b/deploy/helm/llm-proxy/templates/deployment.yaml
new file mode 100644
index 00000000..19e57058
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/deployment.yaml
@@ -0,0 +1,118 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: server
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: 1
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "llm-proxy.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: server
+  template:
+    metadata:
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
+        checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
+        {{- with .Values.podAnnotations }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      labels:
+        {{- include "llm-proxy.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: server
+    spec:
+      {{- with (concat .Values.global.imagePullSecrets .Values.imagePullSecrets) }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "llm-proxy.serviceAccountName" . }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: {{ include "llm-proxy.image" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          command: ["/app/entrypoint.sh"]
+          args: ["server"]
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.targetPort }}
+              protocol: TCP
+          env:
+            {{- include "llm-proxy.env" . | nindent 12 }}
+          {{- with .Values.envFrom }}
+          envFrom:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- if .Values.healthChecks.liveness.enabled }}
+          livenessProbe:
+            httpGet:
+              path: {{ .Values.healthChecks.liveness.path }}
+              port: http
+            initialDelaySeconds: {{ .Values.healthChecks.liveness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.healthChecks.liveness.periodSeconds }}
+            timeoutSeconds: {{ .Values.healthChecks.liveness.timeoutSeconds }}
+            failureThreshold: {{ .Values.healthChecks.liveness.failureThreshold }}
+            successThreshold: {{ .Values.healthChecks.liveness.successThreshold }}
+          {{- end }}
+          {{- if .Values.healthChecks.readiness.enabled }}
+          readinessProbe:
+            httpGet:
+              path: {{ .Values.healthChecks.readiness.path }}
+              port: http
+            initialDelaySeconds: {{ .Values.healthChecks.readiness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.healthChecks.readiness.periodSeconds }}
+            timeoutSeconds: {{ .Values.healthChecks.readiness.timeoutSeconds }}
+            failureThreshold: {{ .Values.healthChecks.readiness.failureThreshold }}
+            successThreshold: {{ .Values.healthChecks.readiness.successThreshold }}
+          {{- end }}
+          {{- if .Values.healthChecks.startup.enabled }}
+          startupProbe:
+            httpGet:
+              path: {{ .Values.healthChecks.startup.path }}
+              port: http
+            initialDelaySeconds: {{ .Values.healthChecks.startup.initialDelaySeconds }}
+            periodSeconds: {{ .Values.healthChecks.startup.periodSeconds }}
+            timeoutSeconds: {{ .Values.healthChecks.startup.timeoutSeconds }}
+            failureThreshold: {{ .Values.healthChecks.startup.failureThreshold }}
+            successThreshold: {{ .Values.healthChecks.startup.successThreshold }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+          volumeMounts:
+            {{- include "llm-proxy.volumeMounts" . | nindent 12 }}
+            - name: config
+              mountPath: /app/config/api_providers.yaml
+              subPath: api_providers.yaml
+              readOnly: true
+            - name: config
+              mountPath: /app/config/health.yaml
+              subPath: health.yaml
+              readOnly: true
+      volumes:
+        {{- include "llm-proxy.volumes" . | nindent 8 }}
+        - name: config
+          configMap:
+            name: {{ include "llm-proxy.fullname" . }}-config
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.topologySpreadConstraints }}
+      topologySpreadConstraints:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/hpa.yaml b/deploy/helm/llm-proxy/templates/hpa.yaml
new file mode 100644
index 00000000..a6ff1282
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/hpa.yaml
@@ -0,0 +1,52 @@
+{{- if .Values.autoscaling.enabled }}
+{{- if semverCompare ">=1.23-0" .Capabilities.KubeVersion.GitVersion -}}
+apiVersion: autoscaling/v2
+{{- else -}}
+apiVersion: autoscaling/v2beta1
+{{- end }}
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "llm-proxy.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        {{- if semverCompare ">=1.23-0" .Capabilities.KubeVersion.GitVersion }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+        {{- else }}
+        targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+        {{- end }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        {{- if semverCompare ">=1.23-0" .Capabilities.KubeVersion.GitVersion }}
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+        {{- else }}
+        targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+        {{- end }}
+    {{- end }}
+    {{- with .Values.autoscaling.metrics }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.autoscaling.behavior }}
+  behavior:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/ingress.yaml b/deploy/helm/llm-proxy/templates/ingress.yaml
new file mode 100644
index 00000000..8cc15222
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/ingress.yaml
@@ -0,0 +1,60 @@
+{{- if .Values.ingress.enabled -}}
+{{- $fullName := include "llm-proxy.fullname" . -}}
+{{- $svcPort := .Values.service.port -}}
+{{- if and .Values.ingress.className (not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class")) }}
+  {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
+{{- end }}
+{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
+apiVersion: networking.k8s.io/v1
+{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
+apiVersion: networking.k8s.io/v1beta1
+{{- else -}}
+apiVersion: extensions/v1beta1
+{{- end }}
+kind: Ingress
+metadata:
+  name: {{ $fullName }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: ingress
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
+  ingressClassName: {{ .Values.ingress.className }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
+            pathType: {{ .pathType }}
+            {{- end }}
+            backend:
+              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
+              service:
+                name: {{ $fullName }}
+                port:
+                  number: {{ $svcPort }}
+              {{- else }}
+              serviceName: {{ $fullName }}
+              servicePort: {{ $svcPort }}
+              {{- end }}
+          {{- end }}
+    {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/networkpolicy.yaml b/deploy/helm/llm-proxy/templates/networkpolicy.yaml
new file mode 100644
index 00000000..d901d958
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/networkpolicy.yaml
@@ -0,0 +1,63 @@
+{{- if .Values.networkPolicy.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: network-policy
+spec:
+  podSelector:
+    matchLabels:
+      {{- include "llm-proxy.selectorLabels" . | nindent 6 }}
+  policyTypes:
+    {{- range .Values.networkPolicy.policyTypes }}
+    - {{ . }}
+    {{- end }}
+  {{- if has "Ingress" .Values.networkPolicy.policyTypes }}
+  ingress:
+    # Allow ingress traffic on HTTP port
+    - from: []
+      ports:
+        - protocol: TCP
+          port: {{ .Values.service.targetPort }}
+    {{- with .Values.networkPolicy.ingress }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- end }}
+  {{- if has "Egress" .Values.networkPolicy.policyTypes }}
+  egress:
+    # Allow DNS resolution
+    - to: []
+      ports:
+        - protocol: UDP
+          port: 53
+        - protocol: TCP
+          port: 53
+    # Allow access to Redis
+    {{- if .Values.redis.enabled }}
+    - to:
+        - podSelector:
+            matchLabels:
+              app.kubernetes.io/name: redis
+              app.kubernetes.io/instance: {{ .Release.Name }}
+      ports:
+        - protocol: TCP
+          port: 6379
+    {{- else }}
+    # Allow access to external Redis
+    - to: []
+      ports:
+        - protocol: TCP
+          port: {{ .Values.redis.external.port }}
+    {{- end }}
+    # Allow access to OpenAI API
+    - to: []
+      ports:
+        - protocol: TCP
+          port: 443
+    {{- with .Values.networkPolicy.egress }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/persistentvolumeclaim.yaml b/deploy/helm/llm-proxy/templates/persistentvolumeclaim.yaml
new file mode 100644
index 00000000..371da729
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/persistentvolumeclaim.yaml
@@ -0,0 +1,57 @@
+{{- if .Values.persistence.enabled }}
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}-data
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: storage
+  {{- with .Values.persistence.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  accessModes:
+    {{- range .Values.persistence.accessModes }}
+    - {{ . | quote }}
+    {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.persistence.size | quote }}
+  {{- if .Values.persistence.storageClass }}
+  {{- if eq "-" .Values.persistence.storageClass }}
+  storageClassName: ""
+  {{- else }}
+  storageClassName: {{ .Values.persistence.storageClass | quote }}
+  {{- end }}
+  {{- end }}
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}-logs
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: storage
+  {{- with .Values.persistence.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  accessModes:
+    {{- range .Values.persistence.accessModes }}
+    - {{ . | quote }}
+    {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.persistence.size | quote }}
+  {{- if .Values.persistence.storageClass }}
+  {{- if eq "-" .Values.persistence.storageClass }}
+  storageClassName: ""
+  {{- else }}
+  storageClassName: {{ .Values.persistence.storageClass | quote }}
+  {{- end }}
+  {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/poddisruptionbudget.yaml b/deploy/helm/llm-proxy/templates/poddisruptionbudget.yaml
new file mode 100644
index 00000000..6de5ccab
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/poddisruptionbudget.yaml
@@ -0,0 +1,20 @@
+{{- if .Values.podDisruptionBudget.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: pdb
+spec:
+  {{- if .Values.podDisruptionBudget.minAvailable }}
+  minAvailable: {{ .Values.podDisruptionBudget.minAvailable }}
+  {{- end }}
+  {{- if .Values.podDisruptionBudget.maxUnavailable }}
+  maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "llm-proxy.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: server
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/podmonitor.yaml b/deploy/helm/llm-proxy/templates/podmonitor.yaml
new file mode 100644
index 00000000..fc607d13
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/podmonitor.yaml
@@ -0,0 +1,27 @@
+{{- if .Values.podMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: monitoring
+    {{- with .Values.podMonitor.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.podMonitor.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "llm-proxy.selectorLabels" . | nindent 6 }}
+  podMetricsEndpoints:
+    - port: http
+      path: {{ .Values.podMonitor.path }}
+      interval: {{ .Values.podMonitor.interval }}
+      {{- if .Values.podMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ .Values.podMonitor.scrapeTimeout }}
+      {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/secret.yaml b/deploy/helm/llm-proxy/templates/secret.yaml
new file mode 100644
index 00000000..fb9ce4b5
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/secret.yaml
@@ -0,0 +1,25 @@
+{{- if .Values.secrets.create }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}-secrets
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+type: Opaque
+data:
+  {{- if .Values.config.managementToken }}
+  management-token: {{ .Values.config.managementToken | b64enc }}
+  {{- else }}
+  # NOTE: You must provide a management token either via values or external secret
+  management-token: ""
+  {{- end }}
+  {{- if .Values.config.openai.apiKey }}
+  openai-api-key: {{ .Values.config.openai.apiKey | b64enc }}
+  {{- end }}
+  {{- if and (not .Values.redis.enabled) .Values.redis.external.password }}
+  redis-password: {{ .Values.redis.external.password | b64enc }}
+  {{- end }}
+  {{- if and (eq .Values.config.database.type "postgresql") .Values.config.database.postgresql.password }}
+  database-password: {{ .Values.config.database.postgresql.password | b64enc }}
+  {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/service.yaml b/deploy/helm/llm-proxy/templates/service.yaml
new file mode 100644
index 00000000..93b237f2
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/service.yaml
@@ -0,0 +1,21 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: server
+  {{- with .Values.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.service.targetPort }}
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "llm-proxy.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: server
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/serviceaccount.yaml b/deploy/helm/llm-proxy/templates/serviceaccount.yaml
new file mode 100644
index 00000000..7691f116
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/serviceaccount.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "llm-proxy.serviceAccountName" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: false
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/servicemonitor.yaml b/deploy/helm/llm-proxy/templates/servicemonitor.yaml
new file mode 100644
index 00000000..009a1a96
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/servicemonitor.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "llm-proxy.fullname" . }}
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: monitoring
+    {{- with .Values.serviceMonitor.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.serviceMonitor.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "llm-proxy.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: server
+  endpoints:
+    - port: http
+      path: {{ .Values.serviceMonitor.path }}
+      interval: {{ .Values.serviceMonitor.interval }}
+      {{- if .Values.serviceMonitor.scrapeTimeout }}
+      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
+      {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/templates/tests/test-connection.yaml b/deploy/helm/llm-proxy/templates/tests/test-connection.yaml
new file mode 100644
index 00000000..f7edca64
--- /dev/null
+++ b/deploy/helm/llm-proxy/templates/tests/test-connection.yaml
@@ -0,0 +1,57 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "llm-proxy.fullname" . }}-test"
+  labels:
+    {{- include "llm-proxy.labels" . | nindent 4 }}
+    app.kubernetes.io/component: test
+  annotations:
+    "helm.sh/hook": test
+    "helm.sh/hook-weight": "1"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  restartPolicy: Never
+  containers:
+    - name: curl
+      image: curlimages/curl:latest
+      imagePullPolicy: IfNotPresent
+      command:
+        - /bin/sh
+        - -c
+        - |
+          set -e
+          echo "Testing LLM Proxy health endpoint..."
+          
+          # Wait for service to be available
+          echo "Waiting for service to be ready..."
+          for i in $(seq 1 30); do
+            if curl -sf {{ include "llm-proxy.fullname" . }}:{{ .Values.service.port }}/health; then
+              echo "Health check passed!"
+              break
+            fi
+            echo "Attempt $i failed, retrying in 2s..."
+            sleep 2
+          done
+          
+          # Test health endpoint
+          echo "Testing /health endpoint..."
+          curl -f {{ include "llm-proxy.fullname" . }}:{{ .Values.service.port }}/health
+          
+          # Test readiness endpoint
+          echo "Testing /ready endpoint..."
+          curl -f {{ include "llm-proxy.fullname" . }}:{{ .Values.service.port }}/ready
+          
+          {{- if .Values.config.monitoring.enableMetrics }}
+          # Test metrics endpoint
+          echo "Testing /metrics endpoint..."
+          curl -f {{ include "llm-proxy.fullname" . }}:{{ .Values.service.port }}/metrics
+          {{- end }}
+          
+          echo "All tests passed!"
+      resources:
+        limits:
+          cpu: 100m
+          memory: 128Mi
+        requests:
+          cpu: 10m
+          memory: 32Mi
\ No newline at end of file
diff --git a/deploy/helm/llm-proxy/values.yaml b/deploy/helm/llm-proxy/values.yaml
new file mode 100644
index 00000000..452896b9
--- /dev/null
+++ b/deploy/helm/llm-proxy/values.yaml
@@ -0,0 +1,355 @@
+# Default values for llm-proxy.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+# Global configuration
+global:
+  # Image registry (e.g., ghcr.io, docker.io)
+  imageRegistry: ""
+  # Global image pull secrets
+  imagePullSecrets: []
+
+# Image configuration
+image:
+  repository: ghcr.io/sofatutor/llm-proxy
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: ""
+
+# Image pull secrets
+imagePullSecrets: []
+
+# Override the default name
+nameOverride: ""
+fullnameOverride: ""
+
+# Service account configuration
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+
+# Pod annotations
+podAnnotations: {}
+
+# Pod security context
+podSecurityContext:
+  fsGroup: 2000
+  runAsNonRoot: true
+  runAsUser: 1000
+  runAsGroup: 3000
+  seccompProfile:
+    type: RuntimeDefault
+
+# Container security context
+securityContext:
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+    - ALL
+  readOnlyRootFilesystem: true
+  runAsNonRoot: true
+  runAsUser: 1000
+
+# Service configuration
+service:
+  type: ClusterIP
+  port: 8080
+  targetPort: 8080
+  annotations: {}
+
+# Ingress configuration
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+    # kubernetes.io/ingress.class: nginx
+    # kubernetes.io/tls-acme: "true"
+    # cert-manager.io/cluster-issuer: letsencrypt-prod
+  hosts:
+    - host: llm-proxy.local
+      paths:
+        - path: /
+          pathType: Prefix
+  tls: []
+  #  - secretName: llm-proxy-tls
+  #    hosts:
+  #      - llm-proxy.local
+
+# Resource limits and requests
+resources:
+  limits:
+    cpu: 1000m
+    memory: 512Mi
+  requests:
+    cpu: 100m
+    memory: 128Mi
+
+# Horizontal Pod Autoscaler
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 10
+  targetCPUUtilizationPercentage: 80
+  targetMemoryUtilizationPercentage: 80
+  # Custom metrics (optional)
+  behavior: {}
+  metrics: []
+
+# Pod Disruption Budget
+podDisruptionBudget:
+  enabled: true
+  minAvailable: 1
+  # maxUnavailable: 1
+
+# Node selection
+nodeSelector: {}
+
+# Tolerations
+tolerations: []
+
+# Affinity
+affinity: {}
+
+# Topology spread constraints
+topologySpreadConstraints: []
+
+# LLM Proxy Configuration
+config:
+  # Management token for admin operations
+  # Required: Generate a strong, unique token for administrative access
+  managementToken: ""
+  
+  # Server configuration
+  listenAddr: ":8080"
+  logLevel: "info"
+  logFormat: "json"
+  
+  # Database configuration
+  database:
+    # Database type: sqlite or postgresql
+    type: "sqlite"
+    # SQLite configuration
+    sqlite:
+      path: "/app/data/llm-proxy.db"
+    # PostgreSQL configuration (when type is postgresql)
+    postgresql:
+      host: ""
+      port: 5432
+      user: ""
+      password: ""
+      database: ""
+      sslmode: "require"
+  
+  # OpenAI API configuration
+  openai:
+    apiUrl: "https://api.openai.com"
+    requestTimeout: "30s"
+    maxRequestSize: "10MB"
+    enableStreaming: true
+  
+  # Security configuration
+  security:
+    corsAllowedOrigins: "*"
+    corsAllowedMethods: "GET,POST,PUT,DELETE,OPTIONS"
+    corsAllowedHeaders: "Authorization,Content-Type"
+    corsMaxAge: 86400
+    maskApiKeys: true
+    validateApiKeyFormat: true
+    defaultTokenLifetime: "30d"
+    defaultTokenRequestLimit: 5000
+  
+  # Rate limiting
+  rateLimiting:
+    globalRateLimit: 100
+    ipRateLimit: 30
+  
+  # Performance tuning
+  performance:
+    maxConcurrentRequests: 100
+    workerPoolSize: 10
+  
+  # Monitoring
+  monitoring:
+    enableMetrics: true
+    metricsPath: "/metrics"
+  
+  # Token cleanup
+  tokenCleanupInterval: "1h"
+  
+  # Observability
+  observability:
+    enabled: true
+    bufferSize: 1000
+
+# Redis configuration
+redis:
+  # Enable Redis dependency
+  enabled: true
+  # External Redis configuration (when enabled: false)
+  external:
+    host: ""
+    port: 6379
+    password: ""
+    database: 0
+  # Bitnami Redis configuration (when enabled: true)
+  auth:
+    enabled: false
+    password: ""
+  master:
+    persistence:
+      enabled: true
+      size: 8Gi
+  replica:
+    replicaCount: 1
+    persistence:
+      enabled: true
+      size: 8Gi
+
+# Event Dispatcher configuration
+dispatcher:
+  # Enable event dispatcher deployment
+  enabled: true
+  # Number of dispatcher replicas
+  replicaCount: 1
+  # Image configuration (inherits from main image if not specified)
+  image:
+    repository: ""
+    tag: ""
+    pullPolicy: ""
+  # Resources for dispatcher pods
+  resources:
+    limits:
+      cpu: 500m
+      memory: 256Mi
+    requests:
+      cpu: 50m
+      memory: 64Mi
+  # Dispatcher services configuration
+  services:
+    file:
+      enabled: true
+      endpoint: "/app/logs/events.jsonl"
+    helicone:
+      enabled: false
+      apiKey: ""
+    # Add more dispatcher services as needed
+
+# Admin UI configuration
+adminUI:
+  # Enable admin UI
+  enabled: true
+  # Path for admin UI
+  path: "/admin"
+  # Base URL for API access
+  apiBaseUrl: ""
+
+# Secrets configuration
+secrets:
+  # Create secrets from values
+  create: true
+  # Use external secret store (set to false if using external secrets)
+  external: false
+  # External secret names (when external: true)
+  externalSecrets:
+    managementToken: ""
+    openaiApiKey: ""
+    databasePassword: ""
+    redisPassword: ""
+
+# Environment variables
+env: []
+  # - name: CUSTOM_VAR
+  #   value: "custom-value"
+
+# Environment variables from secrets/configmaps
+envFrom: []
+  # - secretRef:
+  #     name: custom-secret
+  # - configMapRef:
+  #     name: custom-configmap
+
+# Additional volumes
+volumes: []
+  # - name: custom-volume
+  #   configMap:
+  #     name: custom-configmap
+
+# Additional volume mounts
+volumeMounts: []
+  # - name: custom-volume
+  #   mountPath: /app/custom
+  #   readOnly: true
+
+# Health checks configuration
+healthChecks:
+  liveness:
+    enabled: true
+    initialDelaySeconds: 30
+    periodSeconds: 30
+    timeoutSeconds: 5
+    failureThreshold: 3
+    successThreshold: 1
+    path: "/health"
+  readiness:
+    enabled: true
+    initialDelaySeconds: 5
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 3
+    successThreshold: 1
+    path: "/ready"
+  startup:
+    enabled: true
+    initialDelaySeconds: 5
+    periodSeconds: 10
+    timeoutSeconds: 5
+    failureThreshold: 30
+    successThreshold: 1
+    path: "/health"
+
+# Persistence configuration
+persistence:
+  enabled: true
+  # Storage class for persistent volumes
+  storageClass: ""
+  # Access modes
+  accessModes:
+    - ReadWriteOnce
+  # Size of the persistent volume
+  size: 10Gi
+  # Annotations
+  annotations: {}
+  # Data directory mount path
+  dataPath: "/app/data"
+  # Logs directory mount path
+  logsPath: "/app/logs"
+
+# Network policies
+networkPolicy:
+  enabled: false
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress: []
+  egress: []
+
+# Service monitor for Prometheus (if using Prometheus Operator)
+serviceMonitor:
+  enabled: false
+  interval: 30s
+  path: /metrics
+  labels: {}
+  annotations: {}
+
+# Pod monitor for Prometheus (if using Prometheus Operator)
+podMonitor:
+  enabled: false
+  interval: 30s
+  path: /metrics
+  labels: {}
+  annotations: {}
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
index e6914915..9baa29e8 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -18,6 +18,11 @@ Start with the main [README](../README.md) for a quick overview, installation, a
 - **[API Configuration](api-configuration.md)** - Configure API providers, endpoints, and security policies
 - **[Security Best Practices](security.md)** - Production security, secrets management, and hardening
 
+## Deployment
+
+- **[Kubernetes with Helm](kubernetes-helm.md)** - Complete Kubernetes deployment guide using Helm charts
+- **[Docker Deployment](../README.md#docker-deployment)** - Container deployment with Docker and Docker Compose
+
 ## Observability & Monitoring
 
 - **[Instrumentation Guide](instrumentation.md)** - Event system, async middleware, and monitoring
diff --git a/docs/kubernetes-helm.md b/docs/kubernetes-helm.md
new file mode 100644
index 00000000..40a7b6db
--- /dev/null
+++ b/docs/kubernetes-helm.md
@@ -0,0 +1,439 @@
+# Kubernetes Deployment with Helm
+
+This document provides comprehensive instructions for deploying LLM Proxy to Kubernetes using Helm charts.
+
+## Table of Contents
+
+- [Prerequisites](#prerequisites)
+- [Quick Start](#quick-start)
+- [Configuration](#configuration)
+- [Installation Examples](#installation-examples)
+- [Upgrade Guide](#upgrade-guide)
+- [Security Considerations](#security-considerations)
+- [Monitoring and Observability](#monitoring-and-observability)
+- [Troubleshooting](#troubleshooting)
+
+## Prerequisites
+
+### Required Tools
+
+- **Kubernetes cluster** (v1.19+)
+- **Helm** (v3.8+)
+- **kubectl** configured for your cluster
+
+### Required Kubernetes Resources
+
+- **Namespace** (recommended: dedicated namespace)
+- **Storage class** for persistent volumes
+- **Ingress controller** (for external access)
+- **Secrets management** (for production deployments)
+
+### Install Helm
+
+```bash
+# Install Helm (if not already installed)
+curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+
+# Verify installation
+helm version
+```
+
+## Quick Start
+
+### 1. Add Helm Repository Dependencies
+
+```bash
+# Add Bitnami repository for Redis dependency
+helm repo add bitnami https://charts.bitnami.com/bitnami
+helm repo update
+```
+
+### 2. Create Namespace
+
+```bash
+kubectl create namespace llm-proxy
+```
+
+### 3. Basic Installation
+
+```bash
+# Navigate to the chart directory
+cd deploy/helm/llm-proxy
+
+# Update dependencies
+helm dependency update
+
+# Install with minimal configuration
+helm install llm-proxy . \
+  --namespace llm-proxy \
+  --set config.managementToken="$(openssl rand -base64 32)" \
+  --wait
+```
+
+### 4. Verify Installation
+
+```bash
+# Check deployment status
+kubectl get pods -n llm-proxy
+
+# Test health endpoint
+kubectl port-forward -n llm-proxy svc/llm-proxy 8080:8080
+curl http://localhost:8080/health
+```
+
+## Configuration
+
+### Core Configuration Options
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `image.repository` | Docker image repository | `ghcr.io/sofatutor/llm-proxy` |
+| `image.tag` | Docker image tag | `latest` |
+| `config.managementToken` | Management API token | `""` (required) |
+| `config.logLevel` | Log level | `info` |
+| `redis.enabled` | Enable Redis dependency | `true` |
+| `dispatcher.enabled` | Enable event dispatcher | `true` |
+| `autoscaling.enabled` | Enable horizontal pod autoscaler | `false` |
+| `ingress.enabled` | Enable ingress | `false` |
+
+### Environment-Specific Values
+
+The chart includes example values files for different environments:
+
+- **Development**: `examples/values-development.yaml`
+- **Production**: `examples/values-production.yaml`
+
+## Installation Examples
+
+### Development Environment
+
+```bash
+helm install llm-proxy . \
+  --namespace llm-proxy \
+  --values examples/values-development.yaml \
+  --set config.managementToken="dev-management-token" \
+  --wait
+```
+
+### Production Environment
+
+```bash
+# Create production secrets first
+kubectl create secret generic llm-proxy-secrets \
+  --namespace llm-proxy \
+  --from-literal=management-token="$(openssl rand -base64 32)" \
+  --from-literal=openai-api-key="sk-your-openai-key"
+
+# Install with production configuration
+helm install llm-proxy . \
+  --namespace llm-proxy \
+  --values examples/values-production.yaml \
+  --set secrets.external=true \
+  --set ingress.hosts[0].host=llm-proxy.yourdomain.com \
+  --wait
+```
+
+### External Redis Configuration
+
+```bash
+helm install llm-proxy . \
+  --namespace llm-proxy \
+  --set redis.enabled=false \
+  --set redis.external.host=redis.example.com \
+  --set redis.external.port=6379 \
+  --set config.managementToken="$(openssl rand -base64 32)" \
+  --wait
+```
+
+### PostgreSQL Database Configuration
+
+```bash
+helm install llm-proxy . \
+  --namespace llm-proxy \
+  --set config.database.type=postgresql \
+  --set config.database.postgresql.host=postgres.example.com \
+  --set config.database.postgresql.user=llmproxy \
+  --set config.database.postgresql.database=llmproxy \
+  --set config.managementToken="$(openssl rand -base64 32)" \
+  --wait
+```
+
+## Upgrade Guide
+
+### Standard Upgrade
+
+```bash
+# Update Helm dependencies
+helm dependency update
+
+# Upgrade deployment
+helm upgrade llm-proxy . \
+  --namespace llm-proxy \
+  --values your-values.yaml \
+  --wait
+```
+
+### Rolling Back
+
+```bash
+# View release history
+helm history llm-proxy -n llm-proxy
+
+# Rollback to previous version
+helm rollback llm-proxy 1 -n llm-proxy
+```
+
+### Zero-Downtime Upgrades
+
+The chart supports zero-downtime upgrades through:
+- **Pod Disruption Budget**: Ensures minimum replicas during upgrades
+- **Rolling Update Strategy**: Gradual pod replacement
+- **Health Checks**: Ensures new pods are healthy before proceeding
+
+## Security Considerations
+
+### Secrets Management
+
+#### Option 1: Kubernetes Secrets (Development)
+
+```bash
+kubectl create secret generic llm-proxy-secrets \
+  --namespace llm-proxy \
+  --from-literal=management-token="$(openssl rand -base64 32)" \
+  --from-literal=openai-api-key="sk-your-key"
+```
+
+#### Option 2: External Secret Operator (Production)
+
+```yaml
+# Install External Secrets Operator first
+helm repo add external-secrets https://charts.external-secrets.io
+helm install external-secrets external-secrets/external-secrets -n external-secrets-system --create-namespace
+
+# Configure SecretStore (example for AWS Secrets Manager)
+apiVersion: external-secrets.io/v1beta1
+kind: SecretStore
+metadata:
+  name: llm-proxy-secrets
+  namespace: llm-proxy
+spec:
+  provider:
+    aws:
+      service: SecretsManager
+      region: us-west-2
+      auth:
+        secretRef:
+          accessKeyId:
+            name: aws-secret
+            key: access-key-id
+          secretAccessKey:
+            name: aws-secret
+            key: secret-access-key
+```
+
+### Network Security
+
+#### Network Policies
+
+Enable network policies for production deployments:
+
+```yaml
+networkPolicy:
+  enabled: true
+  policyTypes:
+    - Ingress
+    - Egress
+```
+
+#### Pod Security Standards
+
+The chart implements security best practices:
+- Non-root containers
+- Read-only root filesystem
+- Dropped capabilities
+- Seccomp profiles
+
+### RBAC
+
+The chart creates a minimal service account with no additional permissions by default. For production:
+
+```yaml
+serviceAccount:
+  create: true
+  annotations:
+    # AWS IRSA example
+    eks.amazonaws.com/role-arn: arn:aws:iam::ACCOUNT:role/llm-proxy-role
+```
+
+## Monitoring and Observability
+
+### Prometheus Integration
+
+Enable Prometheus monitoring:
+
+```yaml
+serviceMonitor:
+  enabled: true
+  labels:
+    release: prometheus-operator
+
+podMonitor:
+  enabled: true
+  labels:
+    release: prometheus-operator
+```
+
+### Health Checks
+
+The chart configures comprehensive health checks:
+
+- **Liveness Probe**: `/health` - Detects unhealthy containers
+- **Readiness Probe**: `/ready` - Ensures pod is ready for traffic  
+- **Startup Probe**: `/health` - Handles slow-starting containers
+
+### Event Dispatchers
+
+Configure event dispatchers for observability:
+
+```yaml
+dispatcher:
+  enabled: true
+  services:
+    file:
+      enabled: true
+      endpoint: "/app/logs/events.jsonl"
+    helicone:
+      enabled: true
+      apiKey: "your-helicone-key"
+```
+
+## Troubleshooting
+
+### Common Issues
+
+#### 1. Pod Stuck in Pending State
+
+```bash
+# Check pod events
+kubectl describe pod -n llm-proxy -l app.kubernetes.io/name=llm-proxy
+
+# Check node resources
+kubectl top nodes
+
+# Check storage
+kubectl get pv,pvc -n llm-proxy
+```
+
+#### 2. Health Check Failures
+
+```bash
+# Check pod logs
+kubectl logs -n llm-proxy -l app.kubernetes.io/name=llm-proxy
+
+# Test health endpoint directly
+kubectl exec -it -n llm-proxy deployment/llm-proxy -- wget -qO- http://localhost:8080/health
+```
+
+#### 3. Redis Connection Issues
+
+```bash
+# Check Redis pod status
+kubectl get pods -n llm-proxy -l app.kubernetes.io/name=redis
+
+# Test Redis connectivity
+kubectl exec -it -n llm-proxy deployment/llm-proxy -- nc -zv redis-host 6379
+```
+
+### Debug Commands
+
+```bash
+# View all resources
+kubectl get all -n llm-proxy
+
+# Check configuration
+helm get values llm-proxy -n llm-proxy
+
+# View rendered templates
+helm template llm-proxy . --debug
+
+# Run Helm tests
+helm test llm-proxy -n llm-proxy
+
+# Check ingress
+kubectl describe ingress -n llm-proxy
+```
+
+### Log Analysis
+
+```bash
+# View application logs
+kubectl logs -n llm-proxy -l app.kubernetes.io/name=llm-proxy -f
+
+# View dispatcher logs
+kubectl logs -n llm-proxy -l app.kubernetes.io/component=dispatcher -f
+
+# Export logs for analysis
+kubectl logs -n llm-proxy deployment/llm-proxy --since=1h > llm-proxy.log
+```
+
+## Advanced Configuration
+
+### Custom Resources Limits
+
+```yaml
+resources:
+  limits:
+    cpu: 2000m
+    memory: 1Gi
+    ephemeral-storage: 2Gi
+  requests:
+    cpu: 500m
+    memory: 512Mi
+    ephemeral-storage: 1Gi
+```
+
+### Node Affinity and Tolerations
+
+```yaml
+nodeSelector:
+  kubernetes.io/arch: amd64
+  node-type: compute
+
+tolerations:
+  - key: "workload"
+    operator: "Equal"
+    value: "llm-proxy"
+    effect: "NoSchedule"
+
+affinity:
+  podAntiAffinity:
+    preferredDuringSchedulingIgnoredDuringExecution:
+    - weight: 100
+      podAffinityTerm:
+        labelSelector:
+          matchExpressions:
+          - key: app.kubernetes.io/name
+            operator: In
+            values:
+            - llm-proxy
+        topologyKey: kubernetes.io/hostname
+```
+
+### Topology Spread Constraints
+
+```yaml
+topologySpreadConstraints:
+  - maxSkew: 1
+    topologyKey: topology.kubernetes.io/zone
+    whenUnsatisfiable: DoNotSchedule
+    labelSelector:
+      matchLabels:
+        app.kubernetes.io/name: llm-proxy
+```
+
+## Support and Resources
+
+- **GitHub Repository**: https://github.com/sofatutor/llm-proxy
+- **Documentation**: https://github.com/sofatutor/llm-proxy/tree/main/docs
+- **Issues**: https://github.com/sofatutor/llm-proxy/issues
+- **Security**: See [Security Documentation](../../security.md)
\ No newline at end of file