microsoft · dbsectrainer · Feb 14, 2025 · Feb 14, 2025 · nandito · Feb 17, 2025
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -54,6 +54,8 @@ How to set up your local machine.
 
 ## Build for Production
 
+### Option 1: Local Build
+
 - **Build the frontend and then the backend**
 
     Compile the TypeScript files and bundle the project:
@@ -89,6 +91,37 @@ How to set up your local machine.
 
     Open [http://localhost:5000](http://localhost:5000) to view it in the browser.
 
+### Option 2: Docker Build
+
+Data Formulator can also be run using Docker, which provides isolated development and production environments.
+
+#### Development Environment
+
+1. Start the development server with hot-reloading:
+```bash
+docker compose -f docker/docker-compose.yml up data-formulator-dev
+```
+
+2. Access the development servers:
+   - Frontend: http://localhost:5173 (with hot-reloading)
+   - Backend: http://localhost:5000
+
+The development environment provides:
+- Live reload for both frontend and backend changes
+- Source code mounted from host
+- Isolated dependencies
+- Automatic API key configuration
+
+#### Production Environment
+
+1. Build and start the production server:
+```bash
+docker compose -f docker/docker-compose.yml up data-formulator
+```
+
+2. Access Data Formulator at http://localhost:5000
+
+For detailed Docker instructions, configuration options, and troubleshooting, see the [Docker documentation](docker/README.md).
 
 ## Usage
 See the [Usage section on the README.md page](README.md#usage).
diff --git a/docker/.dockerignore b/docker/.dockerignore
@@ -0,0 +1,28 @@
+# Version control
+.git
+.gitignore
+
+# Environment files
+.env
+config/api-keys.env
+
+# Python
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.env/
+.venv/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,71 @@
+# Stage 1: Development environment
+FROM python:3.11-slim as development
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Node.js and yarn
+RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash - \
+    && apt-get install -y nodejs \
+    && npm install -g yarn
+
+# Set working directory
+WORKDIR /app
+
+# Create directory for API keys
+RUN mkdir -p /app/config
+
+# Copy entrypoint script
+COPY docker-entrypoint.sh /app/
+RUN chmod +x /app/docker-entrypoint.sh
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install Node.js dependencies
+COPY package.json yarn.lock ./
+RUN yarn install
+
+# Copy source code
+COPY . .
+
+# Expose ports for both backend and frontend development
+EXPOSE 5000 5173
+
+# Set environment variables
+ENV PORT=5000
+ENV CONFIG_DIR=/app/config
+ENV NODE_ENV=development
+
+# Use entrypoint script
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
+
+# Stage 2: Production environment
+FROM python:3.11-slim as production
+
+# Set working directory
+WORKDIR /app
+
+# Create directory for API keys
+RUN mkdir -p /app/config
+
+# Copy entrypoint script
+COPY docker-entrypoint.sh /app/
+RUN chmod +x /app/docker-entrypoint.sh
+
+# Install data_formulator package
+RUN pip install --no-cache-dir data_formulator
+
+# Expose backend port
+EXPOSE 5000
+
+# Set environment variables
+ENV PORT=5000
+ENV CONFIG_DIR=/app/config
+ENV NODE_ENV=production
+
+# Use entrypoint script
+ENTRYPOINT ["/app/docker-entrypoint.sh"]
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,123 @@
+# Docker Support for Data Formulator
+
+This directory contains Docker configuration for running Data Formulator in both development and production environments.
+
+## Quick Start
+
+1. Clone the repository:
+```bash
+git clone https://github.com/microsoft/data-formulator.git
+cd data-formulator
+```
+
+2. Configure your API keys:
+   - Copy the template: `cp docker/config/api-keys.env.template docker/config/api-keys.env`
+   - Edit `docker/config/api-keys.env` with your API keys:
+   ```env
+   OPENAI_API_KEY=your_openai_key
+   AZURE_API_KEY=your_azure_key
+   ANTHROPIC_API_KEY=your_anthropic_key
+   ```
+
+## Development Mode
+
+Development mode provides hot-reloading for both frontend and backend changes.
+
+1. Start the development environment:
+```bash
+docker compose -f docker/docker-compose.yml up data-formulator-dev
+```
+
+2. Access the development servers:
+   - Frontend: http://localhost:5173 (with hot-reloading)
+   - Backend: http://localhost:5000
+
+3. Development Features:
+   - Live reload on frontend changes
+   - Source code mounted from host
+   - Node modules persisted in Docker volume
+   - Both frontend and backend servers running
+
+## Production Mode
+
+Production mode runs the optimized build for deployment.
+
+1. Start the production environment:
+```bash
+docker compose -f docker/docker-compose.yml up data-formulator
+```
+
+2. Access Data Formulator at http://localhost:5000
+
+## Configuration
+
+### Environment Variables
+
+- `PORT`: The port to run Data Formulator on (default: 5000)
+- `NODE_ENV`: Environment mode ('development' or 'production')
+- `OPENAI_API_KEY`: Your OpenAI API key
+- `AZURE_API_KEY`: Your Azure API key
+- `ANTHROPIC_API_KEY`: Your Anthropic API key
+
+### Custom Port Configuration
+
+1. Update ports in docker-compose.yml:
+```yaml
+ports:
+  - "8080:5000"  # For production
+  # For development:
+  - "8080:5000"  # Backend
+  - "5173:5173"  # Frontend dev server
+```
+
+2. Or use environment variable:
+```bash
+PORT=8080 docker compose -f docker/docker-compose.yml up data-formulator
+```
+
+## Building
+
+### Development Build
+```bash
+docker compose -f docker/docker-compose.yml build data-formulator-dev
+```
+
+### Production Build
+```bash
+docker compose -f docker/docker-compose.yml build data-formulator
+```
+
+## Testing
+
+1. Run tests in development container:
+```bash
+docker compose -f docker/docker-compose.yml run --rm data-formulator-dev yarn test
+```
+
+## Troubleshooting
+
+1. Permission Issues:
+   - Ensure the config directory exists: `mkdir -p docker/config`
+   - Set proper permissions: `chmod 644 docker/config/api-keys.env`
+
+2. Container Startup Issues:
+   - Check logs: `docker compose -f docker/docker-compose.yml logs`
+   - Verify API keys in docker/config/api-keys.env
+   - Ensure no conflicting services on ports 5000 or 5173
+
+3. Development Mode Issues:
+   - Clear node_modules volume: `docker compose -f docker/docker-compose.yml down -v`
+   - Rebuild development container: `docker compose -f docker/docker-compose.yml build --no-cache data-formulator-dev`
+
+4. Hot Reload Not Working:
+   - Ensure proper volume mounts in docker-compose.yml
+   - Check frontend console for errors
+   - Verify file permissions on mounted directories
+
+## Contributing
+
+When contributing Docker-related changes:
+1. Test both development and production builds
+2. Verify hot-reloading functionality
+3. Update documentation for any new features or changes
+4. Follow the project's coding standards
diff --git a/docker/config/api-keys.env.template b/docker/config/api-keys.env.template
@@ -0,0 +1,11 @@
+# Data Formulator API Keys
+# Rename this file to api-keys.env and replace with your actual keys
+
+# OpenAI API Key
+OPENAI_API_KEY=your_openai_key_here
+
+# Azure API Key
+AZURE_API_KEY=your_azure_key_here
+
+# Anthropic API Key
+ANTHROPIC_API_KEY=your_anthropic_key_here
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -0,0 +1,47 @@
+version: '3.8'
+
+services:
+  # Development service with hot-reloading
+  data-formulator-dev:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+      target: development
+    ports:
+      - "5000:5000"  # Backend port
+      - "5173:5173"  # Frontend development port
+    volumes:
+      - ..:/app  # Mount source code for development
+      - ./config:/app/config  # Mount config directory
+      - node_modules:/app/node_modules  # Persist node_modules
+    environment:
+      - PORT=5000
+      - NODE_ENV=development
+      # Uncomment and set your API keys as needed:
+      # - OPENAI_API_KEY=your_key_here
+      # - AZURE_API_KEY=your_key_here
+      # - ANTHROPIC_API_KEY=your_key_here
+    command: sh -c "yarn && yarn start"  # Start in development mode
+    restart: unless-stopped
+
+  # Production service
+  data-formulator:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+      target: production
+    ports:
+      - "5000:5000"
+    volumes:
+      - ./config:/app/config
+    environment:
+      - PORT=5000
+      - NODE_ENV=production
+      # Uncomment and set your API keys as needed:
+      # - OPENAI_API_KEY=your_key_here
+      # - AZURE_API_KEY=your_key_here
+      # - ANTHROPIC_API_KEY=your_key_here
+    restart: unless-stopped
+
+volumes:
+  node_modules:  # Named volume for node_modules
diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+set -e
+
+# Create api-keys.env if it doesn't exist
+if [ ! -f "$CONFIG_DIR/api-keys.env" ]; then
+    touch "$CONFIG_DIR/api-keys.env"
+fi
+
+# Export environment variables from api-keys.env
+if [ -f "$CONFIG_DIR/api-keys.env" ]; then
+    export $(cat "$CONFIG_DIR/api-keys.env" | xargs)
+fi
+
+# Function to start the development server
+start_dev() {
+    echo "Starting Data Formulator in development mode..."
+    # Start the backend server
+    python -m data_formulator --port $PORT &
+    # Start the frontend development server
+    yarn start
+}
+
+# Function to start the production server
+start_prod() {
+    echo "Starting Data Formulator in production mode..."
+    exec python -m data_formulator --port $PORT
+}
+
+# Check environment and start appropriate server
+if [ "$NODE_ENV" = "development" ]; then
+    start_dev
+else
+    start_prod
+fi